diff options
Diffstat (limited to 'arch/arm/mm')
34 files changed, 2509 insertions, 1020 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index cd2c88e7a8f..c348eaee7ee 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -264,7 +264,7 @@ config CPU_ARM1026  # SA110  config CPU_SA110 -	bool "Support StrongARM(R) SA-110 processor" if ARCH_RPC +	bool  	select CPU_32v3 if ARCH_RPC  	select CPU_32v4 if !ARCH_RPC  	select CPU_ABRT_EV4 @@ -420,33 +420,32 @@ config CPU_32v3  	bool  	select CPU_USE_DOMAINS if MMU  	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP -	select TLS_REG_EMUL if SMP || !MMU  	select NEED_KUSER_HELPERS +	select TLS_REG_EMUL if SMP || !MMU  config CPU_32v4  	bool  	select CPU_USE_DOMAINS if MMU  	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP -	select TLS_REG_EMUL if SMP || !MMU  	select NEED_KUSER_HELPERS +	select TLS_REG_EMUL if SMP || !MMU  config CPU_32v4T  	bool  	select CPU_USE_DOMAINS if MMU  	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP -	select TLS_REG_EMUL if SMP || !MMU  	select NEED_KUSER_HELPERS +	select TLS_REG_EMUL if SMP || !MMU  config CPU_32v5  	bool  	select CPU_USE_DOMAINS if MMU  	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP -	select TLS_REG_EMUL if SMP || !MMU  	select NEED_KUSER_HELPERS +	select TLS_REG_EMUL if SMP || !MMU  config CPU_32v6  	bool -	select CPU_USE_DOMAINS if CPU_V6 && MMU  	select TLS_REG_EMUL if !CPU_32v6K && !MMU  config CPU_32v6K @@ -671,7 +670,7 @@ config ARM_VIRT_EXT  config SWP_EMULATE  	bool "Emulate SWP/SWPB instructions" -	depends on !CPU_USE_DOMAINS && CPU_V7 +	depends on CPU_V7  	default y if SMP  	select HAVE_PROC_CPU if PROC_FS  	help @@ -855,7 +854,7 @@ config OUTER_CACHE_SYNC  config CACHE_FEROCEON_L2  	bool "Enable the Feroceon L2 cache controller" -	depends on ARCH_KIRKWOOD || ARCH_MV78XX0 +	depends on ARCH_KIRKWOOD || ARCH_MV78XX0 || ARCH_MVEBU  	default y  	select OUTER_CACHE  	help @@ -890,14 +889,64 @@ config CACHE_L2X0  	help  	  This option enables the L2x0 PrimeCell. +if CACHE_L2X0 +  config CACHE_PL310  	bool -	depends on CACHE_L2X0  	default y if CPU_V7 && !(CPU_V6 || CPU_V6K)  	help  	  This option enables optimisations for the PL310 cache  	  controller. +config PL310_ERRATA_588369 +	bool "PL310 errata: Clean & Invalidate maintenance operations do not invalidate clean lines" +	help +	   The PL310 L2 cache controller implements three types of Clean & +	   Invalidate maintenance operations: by Physical Address +	   (offset 0x7F0), by Index/Way (0x7F8) and by Way (0x7FC). +	   They are architecturally defined to behave as the execution of a +	   clean operation followed immediately by an invalidate operation, +	   both performing to the same memory location. This functionality +	   is not correctly implemented in PL310 as clean lines are not +	   invalidated as a result of these operations. + +config PL310_ERRATA_727915 +	bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption" +	help +	  PL310 implements the Clean & Invalidate by Way L2 cache maintenance +	  operation (offset 0x7FC). This operation runs in background so that +	  PL310 can handle normal accesses while it is in progress. Under very +	  rare circumstances, due to this erratum, write data can be lost when +	  PL310 treats a cacheable write transaction during a Clean & +	  Invalidate by Way operation. + +config PL310_ERRATA_753970 +	bool "PL310 errata: cache sync operation may be faulty" +	help +	  This option enables the workaround for the 753970 PL310 (r3p0) erratum. + +	  Under some condition the effect of cache sync operation on +	  the store buffer still remains when the operation completes. +	  This means that the store buffer is always asked to drain and +	  this prevents it from merging any further writes. The workaround +	  is to replace the normal offset of cache sync operation (0x730) +	  by another offset targeting an unmapped PL310 register 0x740. +	  This has the same effect as the cache sync operation: store buffer +	  drain and waiting for all buffers empty. + +config PL310_ERRATA_769419 +	bool "PL310 errata: no automatic Store Buffer drain" +	help +	  On revisions of the PL310 prior to r3p2, the Store Buffer does +	  not automatically drain. This can cause normal, non-cacheable +	  writes to be retained when the memory system is idle, leading +	  to suboptimal I/O performance for drivers using coherent DMA. +	  This option adds a write barrier to the cpu_idle loop so that, +	  on systems with an outer cache, the store buffer is drained +	  explicitly. + +endif +  config CACHE_TAUROS2  	bool "Enable the Tauros2 L2 cache controller"  	depends on (ARCH_DOVE || ARCH_MMP || CPU_PJ4) @@ -952,3 +1001,9 @@ config ARCH_HAS_BARRIERS  	help  	  This option allows the use of custom mandatory barriers  	  included via the mach/barriers.h file. + +config ARCH_SUPPORTS_BIG_ENDIAN +	bool +	help +	  This option specifies the architecture can support big endian +	  operation. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index ecfe6e53f6e..91da64de440 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -12,6 +12,7 @@ ifneq ($(CONFIG_MMU),y)  obj-y				+= nommu.o  endif +obj-$(CONFIG_ARM_PTDUMP)	+= dump.o  obj-$(CONFIG_MODULES)		+= proc-syms.o  obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o @@ -94,7 +95,8 @@ obj-$(CONFIG_CPU_V7M)		+= proc-v7m.o  AFLAGS_proc-v6.o	:=-Wa,-march=armv6  AFLAGS_proc-v7.o	:=-Wa,-march=armv7-a +obj-$(CONFIG_OUTER_CACHE)	+= l2c-common.o  obj-$(CONFIG_CACHE_FEROCEON_L2)	+= cache-feroceon-l2.o -obj-$(CONFIG_CACHE_L2X0)	+= cache-l2x0.o +obj-$(CONFIG_CACHE_L2X0)	+= cache-l2x0.o l2c-l2x0-resume.o  obj-$(CONFIG_CACHE_XSC3L2)	+= cache-xsc3l2.o  obj-$(CONFIG_CACHE_TAUROS2)	+= cache-tauros2.o diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 80741992a9f..3815a8262af 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -38,9 +38,8 @@ ENTRY(v6_early_abort)  	bne	do_DataAbort  	bic	r1, r1, #1 << 11		@ clear bit 11 of FSR  	ldr	r3, [r4]			@ read aborted ARM instruction -#ifdef CONFIG_CPU_ENDIAN_BE8 -	rev	r3, r3 -#endif + ARM_BE8(rev	r3, r3) +  	do_ldrd_abort tmp=ip, insn=r3  	tst	r3, #1 << 20			@ L = 0 -> write  	orreq	r1, r1, #1 << 11		@ yes. diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6f4585b8907..b8cb1a2688a 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -25,8 +25,10 @@  #include <asm/cp15.h>  #include <asm/system_info.h>  #include <asm/unaligned.h> +#include <asm/opcodes.h>  #include "fault.h" +#include "mm.h"  /*   * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998 @@ -80,6 +82,7 @@ static unsigned long ai_word;  static unsigned long ai_dword;  static unsigned long ai_multi;  static int ai_usermode; +static unsigned long cr_no_alignment;  core_param(alignment, ai_usermode, int, 0600); @@ -90,7 +93,7 @@ core_param(alignment, ai_usermode, int, 0600);  /* Return true if and only if the ARMv6 unaligned access model is in use. */  static bool cpu_is_v6_unaligned(void)  { -	return cpu_architecture() >= CPU_ARCH_ARMv6 && (cr_alignment & CR_U); +	return cpu_architecture() >= CPU_ARCH_ARMv6 && get_cr() & CR_U;  }  static int safe_usermode(int new_usermode, bool warn) @@ -762,21 +765,25 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)  	if (thumb_mode(regs)) {  		u16 *ptr = (u16 *)(instrptr & ~1);  		fault = probe_kernel_address(ptr, tinstr); +		tinstr = __mem_to_opcode_thumb16(tinstr);  		if (!fault) {  			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&  			    IS_T32(tinstr)) {  				/* Thumb-2 32-bit */  				u16 tinst2 = 0;  				fault = probe_kernel_address(ptr + 1, tinst2); -				instr = (tinstr << 16) | tinst2; +				tinst2 = __mem_to_opcode_thumb16(tinst2); +				instr = __opcode_thumb32_compose(tinstr, tinst2);  				thumb2_32b = 1;  			} else {  				isize = 2;  				instr = thumb2arm(tinstr);  			}  		} -	} else +	} else {  		fault = probe_kernel_address(instrptr, instr); +		instr = __mem_to_opcode_arm(instr); +	}  	if (fault) {  		type = TYPE_FAULT; @@ -944,6 +951,13 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)  	return 0;  } +static int __init noalign_setup(char *__unused) +{ +	set_cr(__clear_cr(CR_A)); +	return 1; +} +__setup("noalign", noalign_setup); +  /*   * This needs to be done after sysctl_init, otherwise sys/ will be   * overwritten.  Actually, this shouldn't be in sys/ at all since @@ -961,14 +975,12 @@ static int __init alignment_init(void)  		return -ENOMEM;  #endif -#ifdef CONFIG_CPU_CP15  	if (cpu_is_v6_unaligned()) { -		cr_alignment &= ~CR_A; -		cr_no_alignment &= ~CR_A; -		set_cr(cr_alignment); +		set_cr(__clear_cr(CR_A));  		ai_usermode = safe_usermode(ai_usermode, false);  	} -#endif + +	cr_no_alignment = get_cr() & ~CR_A;  	hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN,  			"alignment exception"); diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c index 48bc3c0a87c..e028a7f2ebc 100644 --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -13,10 +13,15 @@   */  #include <linux/init.h> +#include <linux/of.h> +#include <linux/of_address.h>  #include <linux/highmem.h> +#include <linux/io.h>  #include <asm/cacheflush.h>  #include <asm/cp15.h> -#include <plat/cache-feroceon-l2.h> +#include <asm/hardware/cache-feroceon-l2.h> + +#define L2_WRITETHROUGH_KIRKWOOD	BIT(4)  /*   * Low-level cache maintenance operations. @@ -331,7 +336,9 @@ static void __init enable_l2(void)  			enable_icache();  		if (d)  			enable_dcache(); -	} +	} else +		pr_err(FW_BUG +		       "Feroceon L2: bootloader left the L2 cache on!\n");  }  void __init feroceon_l2_init(int __l2_wt_override) @@ -343,10 +350,47 @@ void __init feroceon_l2_init(int __l2_wt_override)  	outer_cache.inv_range = feroceon_l2_inv_range;  	outer_cache.clean_range = feroceon_l2_clean_range;  	outer_cache.flush_range = feroceon_l2_flush_range; -	outer_cache.inv_all = l2_inv_all;  	enable_l2();  	printk(KERN_INFO "Feroceon L2: Cache support initialised%s.\n",  			 l2_wt_override ? ", in WT override mode" : "");  } +#ifdef CONFIG_OF +static const struct of_device_id feroceon_ids[] __initconst = { +	{ .compatible = "marvell,kirkwood-cache"}, +	{ .compatible = "marvell,feroceon-cache"}, +	{} +}; + +int __init feroceon_of_init(void) +{ +	struct device_node *node; +	void __iomem *base; +	bool l2_wt_override = false; +	struct resource res; + +#if defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) +	l2_wt_override = true; +#endif + +	node = of_find_matching_node(NULL, feroceon_ids); +	if (node && of_device_is_compatible(node, "marvell,kirkwood-cache")) { +		if (of_address_to_resource(node, 0, &res)) +			return -ENODEV; + +		base = ioremap(res.start, resource_size(&res)); +		if (!base) +			return -ENOMEM; + +		if (l2_wt_override) +			writel(readl(base) | L2_WRITETHROUGH_KIRKWOOD, base); +		else +			writel(readl(base) & ~L2_WRITETHROUGH_KIRKWOOD, base); +	} + +	feroceon_l2_init(l2_wt_override); + +	return 0; +} +#endif diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 447da6ffadd..7c3fb41a462 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -16,17 +16,33 @@   * along with this program; if not, write to the Free Software   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA   */ +#include <linux/cpu.h>  #include <linux/err.h>  #include <linux/init.h> +#include <linux/smp.h>  #include <linux/spinlock.h>  #include <linux/io.h>  #include <linux/of.h>  #include <linux/of_address.h>  #include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h>  #include <asm/hardware/cache-l2x0.h> +#include "cache-tauros3.h"  #include "cache-aurora-l2.h" +struct l2c_init_data { +	const char *type; +	unsigned way_size_0; +	unsigned num_lock; +	void (*of_parse)(const struct device_node *, u32 *, u32 *); +	void (*enable)(void __iomem *, u32, unsigned); +	void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); +	void (*save)(void __iomem *); +	struct outer_cache_fns outer_cache; +}; +  #define CACHE_LINE_SIZE		32  static void __iomem *l2x0_base; @@ -35,96 +51,116 @@ static u32 l2x0_way_mask;	/* Bitmask of active ways */  static u32 l2x0_size;  static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; -/* Aurora don't have the cache ID register available, so we have to - * pass it though the device tree */ -static u32  cache_id_part_number_from_dt; -  struct l2x0_regs l2x0_saved_regs; -struct l2x0_of_data { -	void (*setup)(const struct device_node *, u32 *, u32 *); -	void (*save)(void); -	struct outer_cache_fns outer_cache; -}; - -static bool of_init = false; - -static inline void cache_wait_way(void __iomem *reg, unsigned long mask) +/* + * Common code for all cache controllers. + */ +static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)  {  	/* wait for cache operation by line or way to complete */  	while (readl_relaxed(reg) & mask)  		cpu_relax();  } -#ifdef CONFIG_CACHE_PL310 -static inline void cache_wait(void __iomem *reg, unsigned long mask) +/* + * By default, we write directly to secure registers.  Platforms must + * override this if they are running non-secure. + */ +static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg)  { -	/* cache operations by line are atomic on PL310 */ +	if (val == readl_relaxed(base + reg)) +		return; +	if (outer_cache.write_sec) +		outer_cache.write_sec(val, reg); +	else +		writel_relaxed(val, base + reg);  } -#else -#define cache_wait	cache_wait_way -#endif -static inline void cache_sync(void) +/* + * This should only be called when we have a requirement that the + * register be written due to a work-around, as platforms running + * in non-secure mode may not be able to access this register. + */ +static inline void l2c_set_debug(void __iomem *base, unsigned long val)  { -	void __iomem *base = l2x0_base; - -	writel_relaxed(0, base + sync_reg_offset); -	cache_wait(base + L2X0_CACHE_SYNC, 1); +	l2c_write_sec(val, base, L2X0_DEBUG_CTRL);  } -static inline void l2x0_clean_line(unsigned long addr) +static void __l2c_op_way(void __iomem *reg)  { -	void __iomem *base = l2x0_base; -	cache_wait(base + L2X0_CLEAN_LINE_PA, 1); -	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); +	writel_relaxed(l2x0_way_mask, reg); +	l2c_wait_mask(reg, l2x0_way_mask);  } -static inline void l2x0_inv_line(unsigned long addr) +static inline void l2c_unlock(void __iomem *base, unsigned num)  { -	void __iomem *base = l2x0_base; -	cache_wait(base + L2X0_INV_LINE_PA, 1); -	writel_relaxed(addr, base + L2X0_INV_LINE_PA); +	unsigned i; + +	for (i = 0; i < num; i++) { +		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE + +			       i * L2X0_LOCKDOWN_STRIDE); +		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE + +			       i * L2X0_LOCKDOWN_STRIDE); +	}  } -#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) -static inline void debug_writel(unsigned long val) +/* + * Enable the L2 cache controller.  This function must only be + * called when the cache controller is known to be disabled. + */ +static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)  { -	if (outer_cache.set_debug) -		outer_cache.set_debug(val); +	unsigned long flags; + +	l2c_write_sec(aux, base, L2X0_AUX_CTRL); + +	l2c_unlock(base, num_lock); + +	local_irq_save(flags); +	__l2c_op_way(base + L2X0_INV_WAY); +	writel_relaxed(0, base + sync_reg_offset); +	l2c_wait_mask(base + sync_reg_offset, 1); +	local_irq_restore(flags); + +	l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL);  } -static void pl310_set_debug(unsigned long val) +static void l2c_disable(void)  { -	writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL); +	void __iomem *base = l2x0_base; + +	outer_cache.flush_all(); +	l2c_write_sec(0, base, L2X0_CTRL); +	dsb(st);  } -#else -/* Optimised out for non-errata case */ -static inline void debug_writel(unsigned long val) + +#ifdef CONFIG_CACHE_PL310 +static inline void cache_wait(void __iomem *reg, unsigned long mask)  { +	/* cache operations by line are atomic on PL310 */  } - -#define pl310_set_debug	NULL +#else +#define cache_wait	l2c_wait_mask  #endif -#ifdef CONFIG_PL310_ERRATA_588369 -static inline void l2x0_flush_line(unsigned long addr) +static inline void cache_sync(void)  {  	void __iomem *base = l2x0_base; -	/* Clean by PA followed by Invalidate by PA */ -	cache_wait(base + L2X0_CLEAN_LINE_PA, 1); -	writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); -	cache_wait(base + L2X0_INV_LINE_PA, 1); -	writel_relaxed(addr, base + L2X0_INV_LINE_PA); +	writel_relaxed(0, base + sync_reg_offset); +	cache_wait(base + L2X0_CACHE_SYNC, 1);  } -#else -static inline void l2x0_flush_line(unsigned long addr) +#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) +static inline void debug_writel(unsigned long val) +{ +	l2c_set_debug(l2x0_base, val); +} +#else +/* Optimised out for non-errata case */ +static inline void debug_writel(unsigned long val)  { -	void __iomem *base = l2x0_base; -	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); -	writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA);  }  #endif @@ -140,8 +176,7 @@ static void l2x0_cache_sync(void)  static void __l2x0_flush_all(void)  {  	debug_writel(0x03); -	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_INV_WAY); -	cache_wait_way(l2x0_base + L2X0_CLEAN_INV_WAY, l2x0_way_mask); +	__l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);  	cache_sync();  	debug_writel(0x00);  } @@ -156,275 +191,910 @@ static void l2x0_flush_all(void)  	raw_spin_unlock_irqrestore(&l2x0_lock, flags);  } -static void l2x0_clean_all(void) +static void l2x0_disable(void)  {  	unsigned long flags; -	/* clean all ways */  	raw_spin_lock_irqsave(&l2x0_lock, flags); -	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY); -	cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask); -	cache_sync(); +	__l2x0_flush_all(); +	l2c_write_sec(0, l2x0_base, L2X0_CTRL); +	dsb(st);  	raw_spin_unlock_irqrestore(&l2x0_lock, flags);  } -static void l2x0_inv_all(void) +static void l2c_save(void __iomem *base)  { -	unsigned long flags; +	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +} -	/* invalidate all ways */ -	raw_spin_lock_irqsave(&l2x0_lock, flags); -	/* Invalidating when L2 is enabled is a nono */ -	BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN); -	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); -	cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); -	cache_sync(); -	raw_spin_unlock_irqrestore(&l2x0_lock, flags); +/* + * L2C-210 specific code. + * + * The L2C-2x0 PA, set/way and sync operations are atomic, but we must + * ensure that no background operation is running.  The way operations + * are all background tasks. + * + * While a background operation is in progress, any new operation is + * ignored (unspecified whether this causes an error.)  Thankfully, not + * used on SMP. + * + * Never has a different sync register other than L2X0_CACHE_SYNC, but + * we use sync_reg_offset here so we can share some of this with L2C-310. + */ +static void __l2c210_cache_sync(void __iomem *base) +{ +	writel_relaxed(0, base + sync_reg_offset); +} + +static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start, +	unsigned long end) +{ +	while (start < end) { +		writel_relaxed(start, reg); +		start += CACHE_LINE_SIZE; +	}  } -static void l2x0_inv_range(unsigned long start, unsigned long end) +static void l2c210_inv_range(unsigned long start, unsigned long end)  {  	void __iomem *base = l2x0_base; -	unsigned long flags; -	raw_spin_lock_irqsave(&l2x0_lock, flags);  	if (start & (CACHE_LINE_SIZE - 1)) {  		start &= ~(CACHE_LINE_SIZE - 1); -		debug_writel(0x03); -		l2x0_flush_line(start); -		debug_writel(0x00); +		writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);  		start += CACHE_LINE_SIZE;  	}  	if (end & (CACHE_LINE_SIZE - 1)) {  		end &= ~(CACHE_LINE_SIZE - 1); -		debug_writel(0x03); -		l2x0_flush_line(end); -		debug_writel(0x00); +		writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);  	} +	__l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); +	__l2c210_cache_sync(base); +} + +static void l2c210_clean_range(unsigned long start, unsigned long end) +{ +	void __iomem *base = l2x0_base; + +	start &= ~(CACHE_LINE_SIZE - 1); +	__l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end); +	__l2c210_cache_sync(base); +} + +static void l2c210_flush_range(unsigned long start, unsigned long end) +{ +	void __iomem *base = l2x0_base; + +	start &= ~(CACHE_LINE_SIZE - 1); +	__l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end); +	__l2c210_cache_sync(base); +} + +static void l2c210_flush_all(void) +{ +	void __iomem *base = l2x0_base; + +	BUG_ON(!irqs_disabled()); + +	__l2c_op_way(base + L2X0_CLEAN_INV_WAY); +	__l2c210_cache_sync(base); +} + +static void l2c210_sync(void) +{ +	__l2c210_cache_sync(l2x0_base); +} + +static void l2c210_resume(void) +{ +	void __iomem *base = l2x0_base; + +	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) +		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1); +} + +static const struct l2c_init_data l2c210_data __initconst = { +	.type = "L2C-210", +	.way_size_0 = SZ_8K, +	.num_lock = 1, +	.enable = l2c_enable, +	.save = l2c_save, +	.outer_cache = { +		.inv_range = l2c210_inv_range, +		.clean_range = l2c210_clean_range, +		.flush_range = l2c210_flush_range, +		.flush_all = l2c210_flush_all, +		.disable = l2c_disable, +		.sync = l2c210_sync, +		.resume = l2c210_resume, +	}, +}; + +/* + * L2C-220 specific code. + * + * All operations are background operations: they have to be waited for. + * Conflicting requests generate a slave error (which will cause an + * imprecise abort.)  Never uses sync_reg_offset, so we hard-code the + * sync register here. + * + * However, we can re-use the l2c210_resume call. + */ +static inline void __l2c220_cache_sync(void __iomem *base) +{ +	writel_relaxed(0, base + L2X0_CACHE_SYNC); +	l2c_wait_mask(base + L2X0_CACHE_SYNC, 1); +} + +static void l2c220_op_way(void __iomem *base, unsigned reg) +{ +	unsigned long flags; + +	raw_spin_lock_irqsave(&l2x0_lock, flags); +	__l2c_op_way(base + reg); +	__l2c220_cache_sync(base); +	raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start, +	unsigned long end, unsigned long flags) +{ +	raw_spinlock_t *lock = &l2x0_lock; +  	while (start < end) {  		unsigned long blk_end = start + min(end - start, 4096UL);  		while (start < blk_end) { -			l2x0_inv_line(start); +			l2c_wait_mask(reg, 1); +			writel_relaxed(start, reg);  			start += CACHE_LINE_SIZE;  		}  		if (blk_end < end) { -			raw_spin_unlock_irqrestore(&l2x0_lock, flags); -			raw_spin_lock_irqsave(&l2x0_lock, flags); +			raw_spin_unlock_irqrestore(lock, flags); +			raw_spin_lock_irqsave(lock, flags);  		}  	} -	cache_wait(base + L2X0_INV_LINE_PA, 1); -	cache_sync(); -	raw_spin_unlock_irqrestore(&l2x0_lock, flags); + +	return flags;  } -static void l2x0_clean_range(unsigned long start, unsigned long end) +static void l2c220_inv_range(unsigned long start, unsigned long end)  {  	void __iomem *base = l2x0_base;  	unsigned long flags; -	if ((end - start) >= l2x0_size) { -		l2x0_clean_all(); -		return; -	} -  	raw_spin_lock_irqsave(&l2x0_lock, flags); -	start &= ~(CACHE_LINE_SIZE - 1); -	while (start < end) { -		unsigned long blk_end = start + min(end - start, 4096UL); - -		while (start < blk_end) { -			l2x0_clean_line(start); +	if ((start | end) & (CACHE_LINE_SIZE - 1)) { +		if (start & (CACHE_LINE_SIZE - 1)) { +			start &= ~(CACHE_LINE_SIZE - 1); +			writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);  			start += CACHE_LINE_SIZE;  		} -		if (blk_end < end) { -			raw_spin_unlock_irqrestore(&l2x0_lock, flags); -			raw_spin_lock_irqsave(&l2x0_lock, flags); +		if (end & (CACHE_LINE_SIZE - 1)) { +			end &= ~(CACHE_LINE_SIZE - 1); +			l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); +			writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);  		}  	} -	cache_wait(base + L2X0_CLEAN_LINE_PA, 1); -	cache_sync(); + +	flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA, +				   start, end, flags); +	l2c_wait_mask(base + L2X0_INV_LINE_PA, 1); +	__l2c220_cache_sync(base);  	raw_spin_unlock_irqrestore(&l2x0_lock, flags);  } -static void l2x0_flush_range(unsigned long start, unsigned long end) +static void l2c220_clean_range(unsigned long start, unsigned long end)  {  	void __iomem *base = l2x0_base;  	unsigned long flags; +	start &= ~(CACHE_LINE_SIZE - 1);  	if ((end - start) >= l2x0_size) { -		l2x0_flush_all(); +		l2c220_op_way(base, L2X0_CLEAN_WAY);  		return;  	}  	raw_spin_lock_irqsave(&l2x0_lock, flags); +	flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA, +				   start, end, flags); +	l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); +	__l2c220_cache_sync(base); +	raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_range(unsigned long start, unsigned long end) +{ +	void __iomem *base = l2x0_base; +	unsigned long flags; +  	start &= ~(CACHE_LINE_SIZE - 1); +	if ((end - start) >= l2x0_size) { +		l2c220_op_way(base, L2X0_CLEAN_INV_WAY); +		return; +	} + +	raw_spin_lock_irqsave(&l2x0_lock, flags); +	flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, +				   start, end, flags); +	l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); +	__l2c220_cache_sync(base); +	raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_all(void) +{ +	l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY); +} + +static void l2c220_sync(void) +{ +	unsigned long flags; + +	raw_spin_lock_irqsave(&l2x0_lock, flags); +	__l2c220_cache_sync(l2x0_base); +	raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ +	/* +	 * Always enable non-secure access to the lockdown registers - +	 * we write to them as part of the L2C enable sequence so they +	 * need to be accessible. +	 */ +	aux |= L220_AUX_CTRL_NS_LOCKDOWN; + +	l2c_enable(base, aux, num_lock); +} + +static const struct l2c_init_data l2c220_data = { +	.type = "L2C-220", +	.way_size_0 = SZ_8K, +	.num_lock = 1, +	.enable = l2c220_enable, +	.save = l2c_save, +	.outer_cache = { +		.inv_range = l2c220_inv_range, +		.clean_range = l2c220_clean_range, +		.flush_range = l2c220_flush_range, +		.flush_all = l2c220_flush_all, +		.disable = l2c_disable, +		.sync = l2c220_sync, +		.resume = l2c210_resume, +	}, +}; + +/* + * L2C-310 specific code. + * + * Very similar to L2C-210, the PA, set/way and sync operations are atomic, + * and the way operations are all background tasks.  However, issuing an + * operation while a background operation is in progress results in a + * SLVERR response.  We can reuse: + * + *  __l2c210_cache_sync (using sync_reg_offset) + *  l2c210_sync + *  l2c210_inv_range (if 588369 is not applicable) + *  l2c210_clean_range + *  l2c210_flush_range (if 588369 is not applicable) + *  l2c210_flush_all (if 727915 is not applicable) + * + * Errata: + * 588369: PL310 R0P0->R1P0, fixed R2P0. + *	Affects: all clean+invalidate operations + *	clean and invalidate skips the invalidate step, so we need to issue + *	separate operations.  We also require the above debug workaround + *	enclosing this code fragment on affected parts.  On unaffected parts, + *	we must not use this workaround without the debug register writes + *	to avoid exposing a problem similar to 727915. + * + * 727915: PL310 R2P0->R3P0, fixed R3P1. + *	Affects: clean+invalidate by way + *	clean and invalidate by way runs in the background, and a store can + *	hit the line between the clean operation and invalidate operation, + *	resulting in the store being lost. + * + * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2. + *	Affects: 8x64-bit (double fill) line fetches + *	double fill line fetches can fail to cause dirty data to be evicted + *	from the cache before the new data overwrites the second line. + * + * 753970: PL310 R3P0, fixed R3P1. + *	Affects: sync + *	prevents merging writes after the sync operation, until another L2C + *	operation is performed (or a number of other conditions.) + * + * 769419: PL310 R0P0->R3P1, fixed R3P2. + *	Affects: store buffer + *	store buffer is not automatically drained. + */ +static void l2c310_inv_range_erratum(unsigned long start, unsigned long end) +{ +	void __iomem *base = l2x0_base; + +	if ((start | end) & (CACHE_LINE_SIZE - 1)) { +		unsigned long flags; + +		/* Erratum 588369 for both clean+invalidate operations */ +		raw_spin_lock_irqsave(&l2x0_lock, flags); +		l2c_set_debug(base, 0x03); + +		if (start & (CACHE_LINE_SIZE - 1)) { +			start &= ~(CACHE_LINE_SIZE - 1); +			writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); +			writel_relaxed(start, base + L2X0_INV_LINE_PA); +			start += CACHE_LINE_SIZE; +		} + +		if (end & (CACHE_LINE_SIZE - 1)) { +			end &= ~(CACHE_LINE_SIZE - 1); +			writel_relaxed(end, base + L2X0_CLEAN_LINE_PA); +			writel_relaxed(end, base + L2X0_INV_LINE_PA); +		} + +		l2c_set_debug(base, 0x00); +		raw_spin_unlock_irqrestore(&l2x0_lock, flags); +	} + +	__l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); +	__l2c210_cache_sync(base); +} + +static void l2c310_flush_range_erratum(unsigned long start, unsigned long end) +{ +	raw_spinlock_t *lock = &l2x0_lock; +	unsigned long flags; +	void __iomem *base = l2x0_base; + +	raw_spin_lock_irqsave(lock, flags);  	while (start < end) {  		unsigned long blk_end = start + min(end - start, 4096UL); -		debug_writel(0x03); +		l2c_set_debug(base, 0x03);  		while (start < blk_end) { -			l2x0_flush_line(start); +			writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); +			writel_relaxed(start, base + L2X0_INV_LINE_PA);  			start += CACHE_LINE_SIZE;  		} -		debug_writel(0x00); +		l2c_set_debug(base, 0x00);  		if (blk_end < end) { -			raw_spin_unlock_irqrestore(&l2x0_lock, flags); -			raw_spin_lock_irqsave(&l2x0_lock, flags); +			raw_spin_unlock_irqrestore(lock, flags); +			raw_spin_lock_irqsave(lock, flags);  		}  	} -	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); -	cache_sync(); -	raw_spin_unlock_irqrestore(&l2x0_lock, flags); +	raw_spin_unlock_irqrestore(lock, flags); +	__l2c210_cache_sync(base);  } -static void l2x0_disable(void) +static void l2c310_flush_all_erratum(void)  { +	void __iomem *base = l2x0_base;  	unsigned long flags;  	raw_spin_lock_irqsave(&l2x0_lock, flags); -	__l2x0_flush_all(); -	writel_relaxed(0, l2x0_base + L2X0_CTRL); -	dsb(st); +	l2c_set_debug(base, 0x03); +	__l2c_op_way(base + L2X0_CLEAN_INV_WAY); +	l2c_set_debug(base, 0x00); +	__l2c210_cache_sync(base);  	raw_spin_unlock_irqrestore(&l2x0_lock, flags);  } -static void l2x0_unlock(u32 cache_id) +static void __init l2c310_save(void __iomem *base)  { -	int lockregs; -	int i; +	unsigned revision; -	switch (cache_id & L2X0_CACHE_ID_PART_MASK) { -	case L2X0_CACHE_ID_PART_L310: -		lockregs = 8; -		break; -	case AURORA_CACHE_ID: -		lockregs = 4; +	l2c_save(base); + +	l2x0_saved_regs.tag_latency = readl_relaxed(base + +		L310_TAG_LATENCY_CTRL); +	l2x0_saved_regs.data_latency = readl_relaxed(base + +		L310_DATA_LATENCY_CTRL); +	l2x0_saved_regs.filter_end = readl_relaxed(base + +		L310_ADDR_FILTER_END); +	l2x0_saved_regs.filter_start = readl_relaxed(base + +		L310_ADDR_FILTER_START); + +	revision = readl_relaxed(base + L2X0_CACHE_ID) & +			L2X0_CACHE_ID_RTL_MASK; + +	/* From r2p0, there is Prefetch offset/control register */ +	if (revision >= L310_CACHE_ID_RTL_R2P0) +		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base + +							L310_PREFETCH_CTRL); + +	/* From r3p0, there is Power control register */ +	if (revision >= L310_CACHE_ID_RTL_R3P0) +		l2x0_saved_regs.pwr_ctrl = readl_relaxed(base + +							L310_POWER_CTRL); +} + +static void l2c310_resume(void) +{ +	void __iomem *base = l2x0_base; + +	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { +		unsigned revision; + +		/* restore pl310 setup */ +		writel_relaxed(l2x0_saved_regs.tag_latency, +			       base + L310_TAG_LATENCY_CTRL); +		writel_relaxed(l2x0_saved_regs.data_latency, +			       base + L310_DATA_LATENCY_CTRL); +		writel_relaxed(l2x0_saved_regs.filter_end, +			       base + L310_ADDR_FILTER_END); +		writel_relaxed(l2x0_saved_regs.filter_start, +			       base + L310_ADDR_FILTER_START); + +		revision = readl_relaxed(base + L2X0_CACHE_ID) & +				L2X0_CACHE_ID_RTL_MASK; + +		if (revision >= L310_CACHE_ID_RTL_R2P0) +			l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, +				      L310_PREFETCH_CTRL); +		if (revision >= L310_CACHE_ID_RTL_R3P0) +			l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, +				      L310_POWER_CTRL); + +		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); + +		/* Re-enable full-line-of-zeros for Cortex-A9 */ +		if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) +			set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); +	} +} + +static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data) +{ +	switch (act & ~CPU_TASKS_FROZEN) { +	case CPU_STARTING: +		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));  		break; -	default: -		/* L210 and unknown types */ -		lockregs = 1; +	case CPU_DYING: +		set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));  		break;  	} +	return NOTIFY_OK; +} -	for (i = 0; i < lockregs; i++) { -		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE + -			       i * L2X0_LOCKDOWN_STRIDE); -		writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE + -			       i * L2X0_LOCKDOWN_STRIDE); +static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ +	unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; +	bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + +	if (rev >= L310_CACHE_ID_RTL_R2P0) { +		if (cortex_a9) { +			aux |= L310_AUX_CTRL_EARLY_BRESP; +			pr_info("L2C-310 enabling early BRESP for Cortex-A9\n"); +		} else if (aux & L310_AUX_CTRL_EARLY_BRESP) { +			pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n"); +			aux &= ~L310_AUX_CTRL_EARLY_BRESP; +		} +	} + +	if (cortex_a9) { +		u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL); +		u32 acr = get_auxcr(); + +		pr_debug("Cortex-A9 ACR=0x%08x\n", acr); + +		if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO)) +			pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n"); + +		if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3))) +			pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n"); + +		if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) { +			aux |= L310_AUX_CTRL_FULL_LINE_ZERO; +			pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n"); +		} +	} else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) { +		pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n"); +		aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP); +	} + +	if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) { +		u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL); + +		pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n", +			aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "", +			aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "", +			1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK)); +	} + +	/* r3p0 or later has power control register */ +	if (rev >= L310_CACHE_ID_RTL_R3P0) { +		u32 power_ctrl; + +		l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN, +			      base, L310_POWER_CTRL); +		power_ctrl = readl_relaxed(base + L310_POWER_CTRL); +		pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n", +			power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis", +			power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); +	} + +	/* +	 * Always enable non-secure access to the lockdown registers - +	 * we write to them as part of the L2C enable sequence so they +	 * need to be accessible. +	 */ +	aux |= L310_AUX_CTRL_NS_LOCKDOWN; + +	l2c_enable(base, aux, num_lock); + +	if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) { +		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); +		cpu_notifier(l2c310_cpu_enable_flz, 0);  	}  } -void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +static void __init l2c310_fixup(void __iomem *base, u32 cache_id, +	struct outer_cache_fns *fns)  { -	u32 aux; -	u32 cache_id; -	u32 way_size = 0; -	int ways; -	int way_size_shift = L2X0_WAY_SIZE_SHIFT; -	const char *type; +	unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK; +	const char *errata[8]; +	unsigned n = 0; + +	if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) && +	    revision < L310_CACHE_ID_RTL_R2P0 && +	    /* For bcm compatibility */ +	    fns->inv_range == l2c210_inv_range) { +		fns->inv_range = l2c310_inv_range_erratum; +		fns->flush_range = l2c310_flush_range_erratum; +		errata[n++] = "588369"; +	} -	l2x0_base = base; -	if (cache_id_part_number_from_dt) -		cache_id = cache_id_part_number_from_dt; -	else -		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); -	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +	if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) && +	    revision >= L310_CACHE_ID_RTL_R2P0 && +	    revision < L310_CACHE_ID_RTL_R3P1) { +		fns->flush_all = l2c310_flush_all_erratum; +		errata[n++] = "727915"; +	} + +	if (revision >= L310_CACHE_ID_RTL_R3P0 && +	    revision < L310_CACHE_ID_RTL_R3P2) { +		u32 val = readl_relaxed(base + L310_PREFETCH_CTRL); +		/* I don't think bit23 is required here... but iMX6 does so */ +		if (val & (BIT(30) | BIT(23))) { +			val &= ~(BIT(30) | BIT(23)); +			l2c_write_sec(val, base, L310_PREFETCH_CTRL); +			errata[n++] = "752271"; +		} +	} + +	if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) && +	    revision == L310_CACHE_ID_RTL_R3P0) { +		sync_reg_offset = L2X0_DUMMY_REG; +		errata[n++] = "753970"; +	} + +	if (IS_ENABLED(CONFIG_PL310_ERRATA_769419)) +		errata[n++] = "769419"; + +	if (n) { +		unsigned i; + +		pr_info("L2C-310 errat%s", n > 1 ? "a" : "um"); +		for (i = 0; i < n; i++) +			pr_cont(" %s", errata[i]); +		pr_cont(" enabled\n"); +	} +} + +static void l2c310_disable(void) +{ +	/* +	 * If full-line-of-zeros is enabled, we must first disable it in the +	 * Cortex-A9 auxiliary control register before disabling the L2 cache. +	 */ +	if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) +		set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); + +	l2c_disable(); +} + +static const struct l2c_init_data l2c310_init_fns __initconst = { +	.type = "L2C-310", +	.way_size_0 = SZ_8K, +	.num_lock = 8, +	.enable = l2c310_enable, +	.fixup = l2c310_fixup, +	.save = l2c310_save, +	.outer_cache = { +		.inv_range = l2c210_inv_range, +		.clean_range = l2c210_clean_range, +		.flush_range = l2c210_flush_range, +		.flush_all = l2c210_flush_all, +		.disable = l2c310_disable, +		.sync = l2c210_sync, +		.resume = l2c310_resume, +	}, +}; + +static void __init __l2c_init(const struct l2c_init_data *data, +	u32 aux_val, u32 aux_mask, u32 cache_id) +{ +	struct outer_cache_fns fns; +	unsigned way_size_bits, ways; +	u32 aux, old_aux; + +	/* +	 * Sanity check the aux values.  aux_mask is the bits we preserve +	 * from reading the hardware register, and aux_val is the bits we +	 * set. +	 */ +	if (aux_val & aux_mask) +		pr_alert("L2C: platform provided aux values permit register corruption.\n"); +	old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);  	aux &= aux_mask;  	aux |= aux_val; +	if (old_aux != aux) +		pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n", +		        old_aux, aux); +  	/* Determine the number of ways */  	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {  	case L2X0_CACHE_ID_PART_L310: +		if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16)) +			pr_warn("L2C: DT/platform tries to modify or specify cache size\n");  		if (aux & (1 << 16))  			ways = 16;  		else  			ways = 8; -		type = "L310"; -#ifdef CONFIG_PL310_ERRATA_753970 -		/* Unmapped register. */ -		sync_reg_offset = L2X0_DUMMY_REG; -#endif -		if ((cache_id & L2X0_CACHE_ID_RTL_MASK) <= L2X0_CACHE_ID_RTL_R3P0) -			outer_cache.set_debug = pl310_set_debug;  		break; +  	case L2X0_CACHE_ID_PART_L210: +	case L2X0_CACHE_ID_PART_L220:  		ways = (aux >> 13) & 0xf; -		type = "L210";  		break;  	case AURORA_CACHE_ID: -		sync_reg_offset = AURORA_SYNC_REG;  		ways = (aux >> 13) & 0xf;  		ways = 2 << ((ways + 1) >> 2); -		way_size_shift = AURORA_WAY_SIZE_SHIFT; -		type = "Aurora";  		break; +  	default:  		/* Assume unknown chips have 8 ways */  		ways = 8; -		type = "L2x0 series";  		break;  	}  	l2x0_way_mask = (1 << ways) - 1;  	/* -	 * L2 cache Size =  Way size * Number of ways +	 * way_size_0 is the size that a way_size value of zero would be +	 * given the calculation: way_size = way_size_0 << way_size_bits. +	 * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k, +	 * then way_size_0 would be 8k. +	 * +	 * L2 cache size = number of ways * way size.  	 */ -	way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17; -	way_size = 1 << (way_size + way_size_shift); +	way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >> +			L2C_AUX_CTRL_WAY_SIZE_SHIFT; +	l2x0_size = ways * (data->way_size_0 << way_size_bits); -	l2x0_size = ways * way_size * SZ_1K; +	fns = data->outer_cache; +	fns.write_sec = outer_cache.write_sec; +	if (data->fixup) +		data->fixup(l2x0_base, cache_id, &fns);  	/* -	 * Check if l2x0 controller is already enabled. -	 * If you are booting from non-secure mode -	 * accessing the below registers will fault. +	 * Check if l2x0 controller is already enabled.  If we are booting +	 * in non-secure mode accessing the below registers will fault.  	 */ -	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { -		/* Make sure that I&D is not locked down when starting */ -		l2x0_unlock(cache_id); - -		/* l2x0 controller is disabled */ -		writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL); +	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) +		data->enable(l2x0_base, aux, data->num_lock); -		l2x0_inv_all(); +	outer_cache = fns; -		/* enable L2X0 */ -		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL); -	} +	/* +	 * It is strange to save the register state before initialisation, +	 * but hey, this is what the DT implementations decided to do. +	 */ +	if (data->save) +		data->save(l2x0_base);  	/* Re-read it in case some bits are reserved. */  	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); -	/* Save the value for resuming. */ -	l2x0_saved_regs.aux_ctrl = aux; +	pr_info("%s cache controller enabled, %d ways, %d kB\n", +		data->type, ways, l2x0_size >> 10); +	pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", +		data->type, cache_id, aux); +} -	if (!of_init) { -		outer_cache.inv_range = l2x0_inv_range; -		outer_cache.clean_range = l2x0_clean_range; -		outer_cache.flush_range = l2x0_flush_range; -		outer_cache.sync = l2x0_cache_sync; -		outer_cache.flush_all = l2x0_flush_all; -		outer_cache.inv_all = l2x0_inv_all; -		outer_cache.disable = l2x0_disable; +void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +{ +	const struct l2c_init_data *data; +	u32 cache_id; + +	l2x0_base = base; + +	cache_id = readl_relaxed(base + L2X0_CACHE_ID); + +	switch (cache_id & L2X0_CACHE_ID_PART_MASK) { +	default: +	case L2X0_CACHE_ID_PART_L210: +		data = &l2c210_data; +		break; + +	case L2X0_CACHE_ID_PART_L220: +		data = &l2c220_data; +		break; + +	case L2X0_CACHE_ID_PART_L310: +		data = &l2c310_init_fns; +		break;  	} -	pr_info("%s cache controller enabled\n", type); -	pr_info("l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d kB\n", -		ways, cache_id, aux, l2x0_size >> 10); +	__l2c_init(data, aux_val, aux_mask, cache_id);  }  #ifdef CONFIG_OF  static int l2_wt_override; +/* Aurora don't have the cache ID register available, so we have to + * pass it though the device tree */ +static u32 cache_id_part_number_from_dt; + +static void __init l2x0_of_parse(const struct device_node *np, +				 u32 *aux_val, u32 *aux_mask) +{ +	u32 data[2] = { 0, 0 }; +	u32 tag = 0; +	u32 dirty = 0; +	u32 val = 0, mask = 0; + +	of_property_read_u32(np, "arm,tag-latency", &tag); +	if (tag) { +		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; +		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; +	} + +	of_property_read_u32_array(np, "arm,data-latency", +				   data, ARRAY_SIZE(data)); +	if (data[0] && data[1]) { +		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | +			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; +		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | +		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); +	} + +	of_property_read_u32(np, "arm,dirty-latency", &dirty); +	if (dirty) { +		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; +		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; +	} + +	*aux_val &= ~mask; +	*aux_val |= val; +	*aux_mask &= ~mask; +} + +static const struct l2c_init_data of_l2c210_data __initconst = { +	.type = "L2C-210", +	.way_size_0 = SZ_8K, +	.num_lock = 1, +	.of_parse = l2x0_of_parse, +	.enable = l2c_enable, +	.save = l2c_save, +	.outer_cache = { +		.inv_range   = l2c210_inv_range, +		.clean_range = l2c210_clean_range, +		.flush_range = l2c210_flush_range, +		.flush_all   = l2c210_flush_all, +		.disable     = l2c_disable, +		.sync        = l2c210_sync, +		.resume      = l2c210_resume, +	}, +}; + +static const struct l2c_init_data of_l2c220_data __initconst = { +	.type = "L2C-220", +	.way_size_0 = SZ_8K, +	.num_lock = 1, +	.of_parse = l2x0_of_parse, +	.enable = l2c220_enable, +	.save = l2c_save, +	.outer_cache = { +		.inv_range   = l2c220_inv_range, +		.clean_range = l2c220_clean_range, +		.flush_range = l2c220_flush_range, +		.flush_all   = l2c220_flush_all, +		.disable     = l2c_disable, +		.sync        = l2c220_sync, +		.resume      = l2c210_resume, +	}, +}; + +static void __init l2c310_of_parse(const struct device_node *np, +	u32 *aux_val, u32 *aux_mask) +{ +	u32 data[3] = { 0, 0, 0 }; +	u32 tag[3] = { 0, 0, 0 }; +	u32 filter[2] = { 0, 0 }; + +	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); +	if (tag[0] && tag[1] && tag[2]) +		writel_relaxed( +			L310_LATENCY_CTRL_RD(tag[0] - 1) | +			L310_LATENCY_CTRL_WR(tag[1] - 1) | +			L310_LATENCY_CTRL_SETUP(tag[2] - 1), +			l2x0_base + L310_TAG_LATENCY_CTRL); + +	of_property_read_u32_array(np, "arm,data-latency", +				   data, ARRAY_SIZE(data)); +	if (data[0] && data[1] && data[2]) +		writel_relaxed( +			L310_LATENCY_CTRL_RD(data[0] - 1) | +			L310_LATENCY_CTRL_WR(data[1] - 1) | +			L310_LATENCY_CTRL_SETUP(data[2] - 1), +			l2x0_base + L310_DATA_LATENCY_CTRL); + +	of_property_read_u32_array(np, "arm,filter-ranges", +				   filter, ARRAY_SIZE(filter)); +	if (filter[1]) { +		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), +			       l2x0_base + L310_ADDR_FILTER_END); +		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, +			       l2x0_base + L310_ADDR_FILTER_START); +	} +} + +static const struct l2c_init_data of_l2c310_data __initconst = { +	.type = "L2C-310", +	.way_size_0 = SZ_8K, +	.num_lock = 8, +	.of_parse = l2c310_of_parse, +	.enable = l2c310_enable, +	.fixup = l2c310_fixup, +	.save  = l2c310_save, +	.outer_cache = { +		.inv_range   = l2c210_inv_range, +		.clean_range = l2c210_clean_range, +		.flush_range = l2c210_flush_range, +		.flush_all   = l2c210_flush_all, +		.disable     = l2c310_disable, +		.sync        = l2c210_sync, +		.resume      = l2c310_resume, +	}, +}; + +/* + * This is a variant of the of_l2c310_data with .sync set to + * NULL. Outer sync operations are not needed when the system is I/O + * coherent, and potentially harmful in certain situations (PCIe/PL310 + * deadlock on Armada 375/38x due to hardware I/O coherency). The + * other operations are kept because they are infrequent (therefore do + * not cause the deadlock in practice) and needed for secondary CPU + * boot and other power management activities. + */ +static const struct l2c_init_data of_l2c310_coherent_data __initconst = { +	.type = "L2C-310 Coherent", +	.way_size_0 = SZ_8K, +	.num_lock = 8, +	.of_parse = l2c310_of_parse, +	.enable = l2c310_enable, +	.fixup = l2c310_fixup, +	.save  = l2c310_save, +	.outer_cache = { +		.inv_range   = l2c210_inv_range, +		.clean_range = l2c210_clean_range, +		.flush_range = l2c210_flush_range, +		.flush_all   = l2c210_flush_all, +		.disable     = l2c310_disable, +		.resume      = l2c310_resume, +	}, +}; +  /*   * Note that the end addresses passed to Linux primitives are   * noninclusive, while the hardware cache range operations use @@ -523,6 +1193,100 @@ static void aurora_flush_range(unsigned long start, unsigned long end)  	}  } +static void aurora_save(void __iomem *base) +{ +	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL); +	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL); +} + +static void aurora_resume(void) +{ +	void __iomem *base = l2x0_base; + +	if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) { +		writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL); +		writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL); +	} +} + +/* + * For Aurora cache in no outer mode, enable via the CP15 coprocessor + * broadcasting of cache commands to L2. + */ +static void __init aurora_enable_no_outer(void __iomem *base, u32 aux, +	unsigned num_lock) +{ +	u32 u; + +	asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u)); +	u |= AURORA_CTRL_FW;		/* Set the FW bit */ +	asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u)); + +	isb(); + +	l2c_enable(base, aux, num_lock); +} + +static void __init aurora_fixup(void __iomem *base, u32 cache_id, +	struct outer_cache_fns *fns) +{ +	sync_reg_offset = AURORA_SYNC_REG; +} + +static void __init aurora_of_parse(const struct device_node *np, +				u32 *aux_val, u32 *aux_mask) +{ +	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; +	u32 mask =  AURORA_ACR_REPLACEMENT_MASK; + +	of_property_read_u32(np, "cache-id-part", +			&cache_id_part_number_from_dt); + +	/* Determine and save the write policy */ +	l2_wt_override = of_property_read_bool(np, "wt-override"); + +	if (l2_wt_override) { +		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; +		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; +	} + +	*aux_val &= ~mask; +	*aux_val |= val; +	*aux_mask &= ~mask; +} + +static const struct l2c_init_data of_aurora_with_outer_data __initconst = { +	.type = "Aurora", +	.way_size_0 = SZ_4K, +	.num_lock = 4, +	.of_parse = aurora_of_parse, +	.enable = l2c_enable, +	.fixup = aurora_fixup, +	.save  = aurora_save, +	.outer_cache = { +		.inv_range   = aurora_inv_range, +		.clean_range = aurora_clean_range, +		.flush_range = aurora_flush_range, +		.flush_all   = l2x0_flush_all, +		.disable     = l2x0_disable, +		.sync        = l2x0_cache_sync, +		.resume      = aurora_resume, +	}, +}; + +static const struct l2c_init_data of_aurora_no_outer_data __initconst = { +	.type = "Aurora", +	.way_size_0 = SZ_4K, +	.num_lock = 4, +	.of_parse = aurora_of_parse, +	.enable = aurora_enable_no_outer, +	.fixup = aurora_fixup, +	.save  = aurora_save, +	.outer_cache = { +		.resume      = aurora_resume, +	}, +}; +  /*   * For certain Broadcom SoCs, depending on the address range, different offsets   * need to be added to the address before passing it to L2 for @@ -587,16 +1351,16 @@ static void bcm_inv_range(unsigned long start, unsigned long end)  	/* normal case, no cross section between start and end */  	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { -		l2x0_inv_range(new_start, new_end); +		l2c210_inv_range(new_start, new_end);  		return;  	}  	/* They cross sections, so it can only be a cross from section  	 * 2 to section 3  	 */ -	l2x0_inv_range(new_start, +	l2c210_inv_range(new_start,  		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); -	l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), +	l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),  		new_end);  } @@ -609,26 +1373,21 @@ static void bcm_clean_range(unsigned long start, unsigned long end)  	if (unlikely(end <= start))  		return; -	if ((end - start) >= l2x0_size) { -		l2x0_clean_all(); -		return; -	} -  	new_start = bcm_l2_phys_addr(start);  	new_end = bcm_l2_phys_addr(end);  	/* normal case, no cross section between start and end */  	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { -		l2x0_clean_range(new_start, new_end); +		l2c210_clean_range(new_start, new_end);  		return;  	}  	/* They cross sections, so it can only be a cross from section  	 * 2 to section 3  	 */ -	l2x0_clean_range(new_start, +	l2c210_clean_range(new_start,  		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); -	l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), +	l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),  		new_end);  } @@ -642,7 +1401,7 @@ static void bcm_flush_range(unsigned long start, unsigned long end)  		return;  	if ((end - start) >= l2x0_size) { -		l2x0_flush_all(); +		outer_cache.flush_all();  		return;  	} @@ -651,296 +1410,94 @@ static void bcm_flush_range(unsigned long start, unsigned long end)  	/* normal case, no cross section between start and end */  	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { -		l2x0_flush_range(new_start, new_end); +		l2c210_flush_range(new_start, new_end);  		return;  	}  	/* They cross sections, so it can only be a cross from section  	 * 2 to section 3  	 */ -	l2x0_flush_range(new_start, +	l2c210_flush_range(new_start,  		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); -	l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), +	l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),  		new_end);  } -static void __init l2x0_of_setup(const struct device_node *np, -				 u32 *aux_val, u32 *aux_mask) -{ -	u32 data[2] = { 0, 0 }; -	u32 tag = 0; -	u32 dirty = 0; -	u32 val = 0, mask = 0; - -	of_property_read_u32(np, "arm,tag-latency", &tag); -	if (tag) { -		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; -		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; -	} - -	of_property_read_u32_array(np, "arm,data-latency", -				   data, ARRAY_SIZE(data)); -	if (data[0] && data[1]) { -		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | -			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; -		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | -		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); -	} - -	of_property_read_u32(np, "arm,dirty-latency", &dirty); -	if (dirty) { -		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; -		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; -	} - -	*aux_val &= ~mask; -	*aux_val |= val; -	*aux_mask &= ~mask; -} - -static void __init pl310_of_setup(const struct device_node *np, -				  u32 *aux_val, u32 *aux_mask) -{ -	u32 data[3] = { 0, 0, 0 }; -	u32 tag[3] = { 0, 0, 0 }; -	u32 filter[2] = { 0, 0 }; - -	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); -	if (tag[0] && tag[1] && tag[2]) -		writel_relaxed( -			((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) | -			((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) | -			((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT), -			l2x0_base + L2X0_TAG_LATENCY_CTRL); - -	of_property_read_u32_array(np, "arm,data-latency", -				   data, ARRAY_SIZE(data)); -	if (data[0] && data[1] && data[2]) -		writel_relaxed( -			((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) | -			((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) | -			((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT), -			l2x0_base + L2X0_DATA_LATENCY_CTRL); - -	of_property_read_u32_array(np, "arm,filter-ranges", -				   filter, ARRAY_SIZE(filter)); -	if (filter[1]) { -		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), -			       l2x0_base + L2X0_ADDR_FILTER_END); -		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN, -			       l2x0_base + L2X0_ADDR_FILTER_START); -	} -} - -static void __init pl310_save(void) -{ -	u32 l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) & -		L2X0_CACHE_ID_RTL_MASK; - -	l2x0_saved_regs.tag_latency = readl_relaxed(l2x0_base + -		L2X0_TAG_LATENCY_CTRL); -	l2x0_saved_regs.data_latency = readl_relaxed(l2x0_base + -		L2X0_DATA_LATENCY_CTRL); -	l2x0_saved_regs.filter_end = readl_relaxed(l2x0_base + -		L2X0_ADDR_FILTER_END); -	l2x0_saved_regs.filter_start = readl_relaxed(l2x0_base + -		L2X0_ADDR_FILTER_START); - -	if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) { -		/* -		 * From r2p0, there is Prefetch offset/control register -		 */ -		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(l2x0_base + -			L2X0_PREFETCH_CTRL); -		/* -		 * From r3p0, there is Power control register -		 */ -		if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0) -			l2x0_saved_regs.pwr_ctrl = readl_relaxed(l2x0_base + -				L2X0_POWER_CTRL); -	} -} - -static void aurora_save(void) -{ -	l2x0_saved_regs.ctrl = readl_relaxed(l2x0_base + L2X0_CTRL); -	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); -} - -static void l2x0_resume(void) -{ -	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { -		/* restore aux ctrl and enable l2 */ -		l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID)); - -		writel_relaxed(l2x0_saved_regs.aux_ctrl, l2x0_base + -			L2X0_AUX_CTRL); - -		l2x0_inv_all(); - -		writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL); -	} -} - -static void pl310_resume(void) -{ -	u32 l2x0_revision; - -	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { -		/* restore pl310 setup */ -		writel_relaxed(l2x0_saved_regs.tag_latency, -			l2x0_base + L2X0_TAG_LATENCY_CTRL); -		writel_relaxed(l2x0_saved_regs.data_latency, -			l2x0_base + L2X0_DATA_LATENCY_CTRL); -		writel_relaxed(l2x0_saved_regs.filter_end, -			l2x0_base + L2X0_ADDR_FILTER_END); -		writel_relaxed(l2x0_saved_regs.filter_start, -			l2x0_base + L2X0_ADDR_FILTER_START); - -		l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) & -			L2X0_CACHE_ID_RTL_MASK; - -		if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) { -			writel_relaxed(l2x0_saved_regs.prefetch_ctrl, -				l2x0_base + L2X0_PREFETCH_CTRL); -			if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0) -				writel_relaxed(l2x0_saved_regs.pwr_ctrl, -					l2x0_base + L2X0_POWER_CTRL); -		} -	} - -	l2x0_resume(); -} +/* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */ +static const struct l2c_init_data of_bcm_l2x0_data __initconst = { +	.type = "BCM-L2C-310", +	.way_size_0 = SZ_8K, +	.num_lock = 8, +	.of_parse = l2c310_of_parse, +	.enable = l2c310_enable, +	.save  = l2c310_save, +	.outer_cache = { +		.inv_range   = bcm_inv_range, +		.clean_range = bcm_clean_range, +		.flush_range = bcm_flush_range, +		.flush_all   = l2c210_flush_all, +		.disable     = l2c310_disable, +		.sync        = l2c210_sync, +		.resume      = l2c310_resume, +	}, +}; -static void aurora_resume(void) +static void __init tauros3_save(void __iomem *base)  { -	if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { -		writel_relaxed(l2x0_saved_regs.aux_ctrl, -				l2x0_base + L2X0_AUX_CTRL); -		writel_relaxed(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL); -	} -} +	l2c_save(base); -static void __init aurora_broadcast_l2_commands(void) -{ -	__u32 u; -	/* Enable Broadcasting of cache commands to L2*/ -	__asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u)); -	u |= AURORA_CTRL_FW;		/* Set the FW bit */ -	__asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u)); -	isb(); +	l2x0_saved_regs.aux2_ctrl = +		readl_relaxed(base + TAUROS3_AUX2_CTRL); +	l2x0_saved_regs.prefetch_ctrl = +		readl_relaxed(base + L310_PREFETCH_CTRL);  } -static void __init aurora_of_setup(const struct device_node *np, -				u32 *aux_val, u32 *aux_mask) +static void tauros3_resume(void)  { -	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; -	u32 mask =  AURORA_ACR_REPLACEMENT_MASK; - -	of_property_read_u32(np, "cache-id-part", -			&cache_id_part_number_from_dt); +	void __iomem *base = l2x0_base; -	/* Determine and save the write policy */ -	l2_wt_override = of_property_read_bool(np, "wt-override"); +	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { +		writel_relaxed(l2x0_saved_regs.aux2_ctrl, +			       base + TAUROS3_AUX2_CTRL); +		writel_relaxed(l2x0_saved_regs.prefetch_ctrl, +			       base + L310_PREFETCH_CTRL); -	if (l2_wt_override) { -		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; -		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; +		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);  	} - -	*aux_val &= ~mask; -	*aux_val |= val; -	*aux_mask &= ~mask;  } -static const struct l2x0_of_data pl310_data = { -	.setup = pl310_of_setup, -	.save  = pl310_save, +static const struct l2c_init_data of_tauros3_data __initconst = { +	.type = "Tauros3", +	.way_size_0 = SZ_8K, +	.num_lock = 8, +	.enable = l2c_enable, +	.save  = tauros3_save, +	/* Tauros3 broadcasts L1 cache operations to L2 */  	.outer_cache = { -		.resume      = pl310_resume, -		.inv_range   = l2x0_inv_range, -		.clean_range = l2x0_clean_range, -		.flush_range = l2x0_flush_range, -		.sync        = l2x0_cache_sync, -		.flush_all   = l2x0_flush_all, -		.inv_all     = l2x0_inv_all, -		.disable     = l2x0_disable, -	}, -}; - -static const struct l2x0_of_data l2x0_data = { -	.setup = l2x0_of_setup, -	.save  = NULL, -	.outer_cache = { -		.resume      = l2x0_resume, -		.inv_range   = l2x0_inv_range, -		.clean_range = l2x0_clean_range, -		.flush_range = l2x0_flush_range, -		.sync        = l2x0_cache_sync, -		.flush_all   = l2x0_flush_all, -		.inv_all     = l2x0_inv_all, -		.disable     = l2x0_disable, -	}, -}; - -static const struct l2x0_of_data aurora_with_outer_data = { -	.setup = aurora_of_setup, -	.save  = aurora_save, -	.outer_cache = { -		.resume      = aurora_resume, -		.inv_range   = aurora_inv_range, -		.clean_range = aurora_clean_range, -		.flush_range = aurora_flush_range, -		.sync        = l2x0_cache_sync, -		.flush_all   = l2x0_flush_all, -		.inv_all     = l2x0_inv_all, -		.disable     = l2x0_disable, -	}, -}; - -static const struct l2x0_of_data aurora_no_outer_data = { -	.setup = aurora_of_setup, -	.save  = aurora_save, -	.outer_cache = { -		.resume      = aurora_resume, -	}, -}; - -static const struct l2x0_of_data bcm_l2x0_data = { -	.setup = pl310_of_setup, -	.save  = pl310_save, -	.outer_cache = { -		.resume      = pl310_resume, -		.inv_range   = bcm_inv_range, -		.clean_range = bcm_clean_range, -		.flush_range = bcm_flush_range, -		.sync        = l2x0_cache_sync, -		.flush_all   = l2x0_flush_all, -		.inv_all     = l2x0_inv_all, -		.disable     = l2x0_disable, +		.resume      = tauros3_resume,  	},  }; +#define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }  static const struct of_device_id l2x0_ids[] __initconst = { -	{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, -	{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, -	{ .compatible = "arm,l210-cache", .data = (void *)&l2x0_data }, -	{ .compatible = "marvell,aurora-system-cache", -	  .data = (void *)&aurora_no_outer_data}, -	{ .compatible = "marvell,aurora-outer-cache", -	  .data = (void *)&aurora_with_outer_data}, -	{ .compatible = "brcm,bcm11351-a2-pl310-cache", -	  .data = (void *)&bcm_l2x0_data}, -	{ .compatible = "bcm,bcm11351-a2-pl310-cache", /* deprecated name */ -	  .data = (void *)&bcm_l2x0_data}, +	L2C_ID("arm,l210-cache", of_l2c210_data), +	L2C_ID("arm,l220-cache", of_l2c220_data), +	L2C_ID("arm,pl310-cache", of_l2c310_data), +	L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), +	L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data), +	L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data), +	L2C_ID("marvell,tauros3-cache", of_tauros3_data), +	/* Deprecated IDs */ +	L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),  	{}  };  int __init l2x0_of_init(u32 aux_val, u32 aux_mask)  { +	const struct l2c_init_data *data;  	struct device_node *np; -	const struct l2x0_of_data *data;  	struct resource res; +	u32 cache_id, old_aux;  	np = of_find_matching_node(NULL, l2x0_ids);  	if (!np) @@ -957,23 +1514,33 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)  	data = of_match_node(l2x0_ids, np)->data; -	/* L2 configuration can only be changed if the cache is disabled */ -	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { -		if (data->setup) -			data->setup(np, &aux_val, &aux_mask); +	if (of_device_is_compatible(np, "arm,pl310-cache") && +	    of_property_read_bool(np, "arm,io-coherent")) +		data = &of_l2c310_coherent_data; -		/* For aurora cache in no outer mode select the -		 * correct mode using the coprocessor*/ -		if (data == &aurora_no_outer_data) -			aurora_broadcast_l2_commands(); +	old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +	if (old_aux != ((old_aux & aux_mask) | aux_val)) { +		pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n", +		        old_aux, (old_aux & aux_mask) | aux_val); +	} else if (aux_mask != ~0U && aux_val != 0) { +		pr_alert("L2C: platform provided aux values match the hardware, so have no effect.  Please remove them.\n");  	} -	if (data->save) -		data->save(); +	/* All L2 caches are unified, so this property should be specified */ +	if (!of_property_read_bool(np, "cache-unified")) +		pr_err("L2C: device tree omits to specify unified cache\n"); + +	/* L2 configuration can only be changed if the cache is disabled */ +	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) +		if (data->of_parse) +			data->of_parse(np, &aux_val, &aux_mask); + +	if (cache_id_part_number_from_dt) +		cache_id = cache_id_part_number_from_dt; +	else +		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); -	of_init = true; -	memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache)); -	l2x0_init(l2x0_base, aux_val, aux_mask); +	__l2c_init(data, aux_val, aux_mask, cache_id);  	return 0;  } diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c index 1be0f4e5e6e..b273739e635 100644 --- a/arch/arm/mm/cache-tauros2.c +++ b/arch/arm/mm/cache-tauros2.c @@ -33,7 +33,7 @@   * outer cache operations into the kernel image if the kernel has been   * configured to support a pre-v7 CPU.   */ -#if __LINUX_ARM_ARCH__ < 7 +#ifdef CONFIG_CPU_32v5  /*   * Low-level cache maintenance operations.   */ @@ -229,33 +229,6 @@ static void __init tauros2_internal_init(unsigned int features)  	}  #endif -#ifdef CONFIG_CPU_32v6 -	/* -	 * Check whether this CPU lacks support for the v7 hierarchical -	 * cache ops.  (PJ4 is in its v6 personality mode if the MMFR3 -	 * register indicates no support for the v7 hierarchical cache -	 * ops.) -	 */ -	if (cpuid_scheme() && (read_mmfr3() & 0xf) == 0) { -		/* -		 * When Tauros2 is used in an ARMv6 system, the L2 -		 * enable bit is in the ARMv6 ARM-mandated position -		 * (bit [26] of the System Control Register). -		 */ -		if (!(get_cr() & 0x04000000)) { -			printk(KERN_INFO "Tauros2: Enabling L2 cache.\n"); -			adjust_cr(0x04000000, 0x04000000); -		} - -		mode = "ARMv6"; -		outer_cache.inv_range = tauros2_inv_range; -		outer_cache.clean_range = tauros2_clean_range; -		outer_cache.flush_range = tauros2_flush_range; -		outer_cache.disable = tauros2_disable; -		outer_cache.resume = tauros2_resume; -	} -#endif -  #ifdef CONFIG_CPU_32v7  	/*  	 * Check whether this CPU has support for the v7 hierarchical diff --git a/arch/arm/mm/cache-tauros3.h b/arch/arm/mm/cache-tauros3.h new file mode 100644 index 00000000000..02c0a97cbc0 --- /dev/null +++ b/arch/arm/mm/cache-tauros3.h @@ -0,0 +1,41 @@ +/* + * Marvell Tauros3 cache controller includes + * + * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> + * + * based on GPL'ed 2.6 kernel sources + *  (c) Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_ARM_HARDWARE_TAUROS3_H +#define __ASM_ARM_HARDWARE_TAUROS3_H + +/* + * Marvell Tauros3 L2CC is compatible with PL310 r0p0 + * but with PREFETCH_CTRL (r2p0) and an additional event counter. + * Also, there is AUX2_CTRL for some Marvell specific control. + */ + +#define TAUROS3_EVENT_CNT2_CFG		0x224 +#define TAUROS3_EVENT_CNT2_VAL		0x228 +#define TAUROS3_INV_ALL			0x780 +#define TAUROS3_CLEAN_ALL		0x784 +#define TAUROS3_AUX2_CTRL		0x820 + +/* Registers shifts and masks */ +#define TAUROS3_AUX2_CTRL_LINEFILL_BURST8_EN	(1 << 2) + +#endif diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index b5c467a65c2..615c99e38ba 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -59,7 +59,7 @@ ENTRY(v7_invalidate_l1)         bgt     2b         cmp     r2, #0         bgt     1b -       dsb +       dsb     st         isb         mov     pc, lr  ENDPROC(v7_invalidate_l1) @@ -146,18 +146,18 @@ flush_levels:  	ldr	r7, =0x7fff  	ands	r7, r7, r1, lsr #13		@ extract max number of the index size  loop1: -	mov	r9, r4				@ create working copy of max way size +	mov	r9, r7				@ create working copy of max index  loop2: - ARM(	orr	r11, r10, r9, lsl r5	)	@ factor way and cache number into r11 - THUMB(	lsl	r6, r9, r5		) + ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11 + THUMB(	lsl	r6, r4, r5		)   THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11 - ARM(	orr	r11, r11, r7, lsl r2	)	@ factor index number into r11 - THUMB(	lsl	r6, r7, r2		) + ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11 + THUMB(	lsl	r6, r9, r2		)   THUMB(	orr	r11, r11, r6		)	@ factor index number into r11  	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way -	subs	r9, r9, #1			@ decrement the way +	subs	r9, r9, #1			@ decrement the index  	bge	loop2 -	subs	r7, r7, #1			@ decrement the index +	subs	r4, r4, #1			@ decrement the way  	bge	loop1  skip:  	add	r10, r10, #2			@ increment cache number @@ -166,7 +166,7 @@ skip:  finished:  	mov	r10, #0				@ swith back to cache level 0  	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr -	dsb +	dsb	st  	isb  	mov	pc, lr  ENDPROC(v7_flush_dcache_all) @@ -335,7 +335,7 @@ ENTRY(v7_flush_kern_dcache_area)  	add	r0, r0, r2  	cmp	r0, r1  	blo	1b -	dsb +	dsb	st  	mov	pc, lr  ENDPROC(v7_flush_kern_dcache_area) @@ -368,7 +368,7 @@ v7_dma_inv_range:  	add	r0, r0, r2  	cmp	r0, r1  	blo	1b -	dsb +	dsb	st  	mov	pc, lr  ENDPROC(v7_dma_inv_range) @@ -390,7 +390,7 @@ v7_dma_clean_range:  	add	r0, r0, r2  	cmp	r0, r1  	blo	1b -	dsb +	dsb	st  	mov	pc, lr  ENDPROC(v7_dma_clean_range) @@ -412,7 +412,7 @@ ENTRY(v7_dma_flush_range)  	add	r0, r0, r2  	cmp	r0, r1  	blo	1b -	dsb +	dsb	st  	mov	pc, lr  ENDPROC(v7_dma_flush_range) diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 84e6f772e20..6eb97b3a748 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -36,8 +36,8 @@   * The context ID is used by debuggers and trace logic, and   * should be unique within all running processes.   * - * In big endian operation, the two 32 bit words are swapped if accesed by - * non 64-bit operations. + * In big endian operation, the two 32 bit words are swapped if accessed + * by non-64-bit operations.   */  #define ASID_FIRST_VERSION	(1ULL << ASID_BITS)  #define NUM_USER_ASIDS		ASID_FIRST_VERSION @@ -78,20 +78,21 @@ void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,  #endif  #ifdef CONFIG_ARM_LPAE -static void cpu_set_reserved_ttbr0(void) -{ -	/* -	 * Set TTBR0 to swapper_pg_dir which contains only global entries. The -	 * ASID is set to 0. -	 */ -	cpu_set_ttbr(0, __pa(swapper_pg_dir)); -	isb(); -} +/* + * With LPAE, the ASID and page tables are updated atomicly, so there is + * no need for a reserved set of tables (the active ASID tracking prevents + * any issues across a rollover). + */ +#define cpu_set_reserved_ttbr0()  #else  static void cpu_set_reserved_ttbr0(void)  {  	u32 ttb; -	/* Copy TTBR1 into TTBR0 */ +	/* +	 * Copy TTBR1 into TTBR0. +	 * This points at swapper_pg_dir, which contains only global +	 * entries so any speculative walks are perfectly safe. +	 */  	asm volatile(  	"	mrc	p15, 0, %0, c2, c0, 1		@ read TTBR1\n"  	"	mcr	p15, 0, %0, c2, c0, 0		@ set TTBR0\n" @@ -179,6 +180,7 @@ static int is_reserved_asid(u64 asid)  static u64 new_context(struct mm_struct *mm, unsigned int cpu)  { +	static u32 cur_idx = 1;  	u64 asid = atomic64_read(&mm->context.id);  	u64 generation = atomic64_read(&asid_generation); @@ -193,10 +195,13 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)  		 * Allocate a free ASID. If we can't find one, take a  		 * note of the currently active ASIDs and mark the TLBs  		 * as requiring flushes. We always count from ASID #1, -		 * as we reserve ASID #0 to switch via TTBR0 and indicate -		 * rollover events. +		 * as we reserve ASID #0 to switch via TTBR0 and to +		 * avoid speculative page table walks from hitting in +		 * any partial walk caches, which could be populated +		 * from overlapping level-1 descriptors used to map both +		 * the module area and the userspace stack.  		 */ -		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); +		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);  		if (asid == NUM_USER_ASIDS) {  			generation = atomic64_add_return(ASID_FIRST_VERSION,  							 &asid_generation); @@ -204,6 +209,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)  			asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);  		}  		__set_bit(asid, asid_map); +		cur_idx = asid;  		asid |= generation;  		cpumask_clear(mm_cpumask(mm));  	} @@ -221,8 +227,9 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)  		__check_vmalloc_seq(mm);  	/* -	 * Required during context switch to avoid speculative page table -	 * walking with the wrong TTBR. +	 * We cannot update the pgd and the ASID atomicly with classic +	 * MMU, so switch exclusively to global mappings to avoid +	 * speculative page table walking with the wrong TTBR.  	 */  	cpu_set_reserved_ttbr0(); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f5e1a847171..1f88db06b13 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -9,6 +9,7 @@   *   *  DMA uncached mapping support.   */ +#include <linux/bootmem.h>  #include <linux/module.h>  #include <linux/mm.h>  #include <linux/gfp.h> @@ -157,9 +158,47 @@ struct dma_map_ops arm_coherent_dma_ops = {  };  EXPORT_SYMBOL(arm_coherent_dma_ops); +static int __dma_supported(struct device *dev, u64 mask, bool warn) +{ +	unsigned long max_dma_pfn; + +	/* +	 * If the mask allows for more memory than we can address, +	 * and we actually have that much memory, then we must +	 * indicate that DMA to this device is not supported. +	 */ +	if (sizeof(mask) != sizeof(dma_addr_t) && +	    mask > (dma_addr_t)~0 && +	    dma_to_pfn(dev, ~0) < max_pfn) { +		if (warn) { +			dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", +				 mask); +			dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n"); +		} +		return 0; +	} + +	max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + +	/* +	 * Translate the device's DMA mask to a PFN limit.  This +	 * PFN number includes the page which we can DMA to. +	 */ +	if (dma_to_pfn(dev, mask) < max_dma_pfn) { +		if (warn) +			dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n", +				 mask, +				 dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1, +				 max_dma_pfn + 1); +		return 0; +	} + +	return 1; +} +  static u64 get_coherent_dma_mask(struct device *dev)  { -	u64 mask = (u64)arm_dma_limit; +	u64 mask = (u64)DMA_BIT_MASK(32);  	if (dev) {  		mask = dev->coherent_dma_mask; @@ -173,12 +212,8 @@ static u64 get_coherent_dma_mask(struct device *dev)  			return 0;  		} -		if ((~mask) & (u64)arm_dma_limit) { -			dev_warn(dev, "coherent DMA mask %#llx is smaller " -				 "than system GFP_DMA mask %#llx\n", -				 mask, (u64)arm_dma_limit); +		if (!__dma_supported(dev, mask, true))  			return 0; -		}  	}  	return mask; @@ -249,9 +284,6 @@ static void __dma_free_buffer(struct page *page, size_t size)  }  #ifdef CONFIG_MMU -#ifdef CONFIG_HUGETLB_PAGE -#warning ARM Coherent DMA allocator does not (yet) support huge TLB -#endif  static void *__alloc_from_contiguous(struct device *dev, size_t size,  				     pgprot_t prot, struct page **ret_page, @@ -341,7 +373,7 @@ void __init init_dma_coherent_pool_size(unsigned long size)  static int __init atomic_pool_init(void)  {  	struct dma_pool *pool = &atomic_pool; -	pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); +	pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL);  	gfp_t gfp = GFP_KERNEL | GFP_DMA;  	unsigned long nr_pages = pool->size >> PAGE_SHIFT;  	unsigned long *bitmap; @@ -358,7 +390,7 @@ static int __init atomic_pool_init(void)  	if (!pages)  		goto no_pages; -	if (IS_ENABLED(CONFIG_DMA_CMA)) +	if (dev_get_cma_area(NULL))  		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,  					      atomic_pool_init);  	else @@ -429,12 +461,21 @@ void __init dma_contiguous_remap(void)  		map.type = MT_MEMORY_DMA_READY;  		/* -		 * Clear previous low-memory mapping +		 * Clear previous low-memory mapping to ensure that the +		 * TLB does not see any conflicting entries, then flush +		 * the TLB of the old entries before creating new mappings. +		 * +		 * This ensures that any speculatively loaded TLB entries +		 * (even though they may be rare) can not cause any problems, +		 * and ensures that this code is architecturally compliant.  		 */  		for (addr = __phys_to_virt(start); addr < __phys_to_virt(end);  		     addr += PMD_SIZE)  			pmd_clear(pmd_off_k(addr)); +		flush_tlb_kernel_range(__phys_to_virt(start), +				       __phys_to_virt(end)); +  		iotable_init(&map, 1);  	}  } @@ -589,7 +630,7 @@ static void __free_from_contiguous(struct device *dev, struct page *page,  	if (PageHighMem(page))  		__dma_free_remap(cpu_addr, size);  	else -		__dma_remap(page, size, pgprot_kernel); +		__dma_remap(page, size, PAGE_KERNEL);  	dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);  } @@ -669,7 +710,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,  		addr = __alloc_simple_buffer(dev, size, gfp, &page);  	else if (!(gfp & __GFP_WAIT))  		addr = __alloc_from_pool(size, &page); -	else if (!IS_ENABLED(CONFIG_DMA_CMA)) +	else if (!dev_get_cma_area(dev))  		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);  	else  		addr = __alloc_from_contiguous(dev, size, prot, &page, caller); @@ -687,7 +728,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,  void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,  		    gfp_t gfp, struct dma_attrs *attrs)  { -	pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); +	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);  	void *memory;  	if (dma_alloc_from_coherent(dev, size, handle, &memory)) @@ -700,7 +741,7 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,  static void *arm_coherent_dma_alloc(struct device *dev, size_t size,  	dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)  { -	pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); +	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);  	void *memory;  	if (dma_alloc_from_coherent(dev, size, handle, &memory)) @@ -758,7 +799,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,  		__dma_free_buffer(page, size);  	} else if (__free_from_pool(cpu_addr, size)) {  		return; -	} else if (!IS_ENABLED(CONFIG_DMA_CMA)) { +	} else if (!dev_get_cma_area(dev)) {  		__dma_free_remap(cpu_addr, size);  		__dma_free_buffer(page, size);  	} else { @@ -853,7 +894,7 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset,  static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,  	size_t size, enum dma_data_direction dir)  { -	unsigned long paddr; +	phys_addr_t paddr;  	dma_cache_maint_page(page, off, size, dir, dmac_map_area); @@ -869,14 +910,15 @@ static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,  static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,  	size_t size, enum dma_data_direction dir)  { -	unsigned long paddr = page_to_phys(page) + off; +	phys_addr_t paddr = page_to_phys(page) + off;  	/* FIXME: non-speculating: not required */ -	/* don't bother invalidating if DMA to device */ -	if (dir != DMA_TO_DEVICE) +	/* in any case, don't bother invalidating if DMA to device */ +	if (dir != DMA_TO_DEVICE) {  		outer_inv_range(paddr, paddr + size); -	dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); +		dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); +	}  	/*  	 * Mark the D-cache clean for these pages to avoid extra flushing. @@ -1007,9 +1049,7 @@ void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,   */  int dma_supported(struct device *dev, u64 mask)  { -	if (mask < (u64)arm_dma_limit) -		return 0; -	return 1; +	return __dma_supported(dev, mask, false);  }  EXPORT_SYMBOL(dma_supported); @@ -1036,48 +1076,98 @@ fs_initcall(dma_debug_do_init);  /* IOMMU */ +static int extend_iommu_mapping(struct dma_iommu_mapping *mapping); +  static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,  				      size_t size)  {  	unsigned int order = get_order(size);  	unsigned int align = 0;  	unsigned int count, start; +	size_t mapping_size = mapping->bits << PAGE_SHIFT;  	unsigned long flags; +	dma_addr_t iova; +	int i;  	if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT)  		order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT; -	count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) + -		 (1 << mapping->order) - 1) >> mapping->order; - -	if (order > mapping->order) -		align = (1 << (order - mapping->order)) - 1; +	count = PAGE_ALIGN(size) >> PAGE_SHIFT; +	align = (1 << order) - 1;  	spin_lock_irqsave(&mapping->lock, flags); -	start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0, -					   count, align); -	if (start > mapping->bits) { -		spin_unlock_irqrestore(&mapping->lock, flags); -		return DMA_ERROR_CODE; +	for (i = 0; i < mapping->nr_bitmaps; i++) { +		start = bitmap_find_next_zero_area(mapping->bitmaps[i], +				mapping->bits, 0, count, align); + +		if (start > mapping->bits) +			continue; + +		bitmap_set(mapping->bitmaps[i], start, count); +		break;  	} -	bitmap_set(mapping->bitmap, start, count); +	/* +	 * No unused range found. Try to extend the existing mapping +	 * and perform a second attempt to reserve an IO virtual +	 * address range of size bytes. +	 */ +	if (i == mapping->nr_bitmaps) { +		if (extend_iommu_mapping(mapping)) { +			spin_unlock_irqrestore(&mapping->lock, flags); +			return DMA_ERROR_CODE; +		} + +		start = bitmap_find_next_zero_area(mapping->bitmaps[i], +				mapping->bits, 0, count, align); + +		if (start > mapping->bits) { +			spin_unlock_irqrestore(&mapping->lock, flags); +			return DMA_ERROR_CODE; +		} + +		bitmap_set(mapping->bitmaps[i], start, count); +	}  	spin_unlock_irqrestore(&mapping->lock, flags); -	return mapping->base + (start << (mapping->order + PAGE_SHIFT)); +	iova = mapping->base + (mapping_size * i); +	iova += start << PAGE_SHIFT; + +	return iova;  }  static inline void __free_iova(struct dma_iommu_mapping *mapping,  			       dma_addr_t addr, size_t size)  { -	unsigned int start = (addr - mapping->base) >> -			     (mapping->order + PAGE_SHIFT); -	unsigned int count = ((size >> PAGE_SHIFT) + -			      (1 << mapping->order) - 1) >> mapping->order; +	unsigned int start, count; +	size_t mapping_size = mapping->bits << PAGE_SHIFT;  	unsigned long flags; +	dma_addr_t bitmap_base; +	u32 bitmap_index; + +	if (!size) +		return; + +	bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size; +	BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions); + +	bitmap_base = mapping->base + mapping_size * bitmap_index; + +	start = (addr - bitmap_base) >>	PAGE_SHIFT; + +	if (addr + size > bitmap_base + mapping_size) { +		/* +		 * The address range to be freed reaches into the iova +		 * range of the next bitmap. This should not happen as +		 * we don't allow this in __alloc_iova (at the +		 * moment). +		 */ +		BUG(); +	} else +		count = size >> PAGE_SHIFT;  	spin_lock_irqsave(&mapping->lock, flags); -	bitmap_clear(mapping->bitmap, start, count); +	bitmap_clear(mapping->bitmaps[bitmap_index], start, count);  	spin_unlock_irqrestore(&mapping->lock, flags);  } @@ -1232,7 +1322,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size)  				break;  		len = (j - i) << PAGE_SHIFT; -		ret = iommu_map(mapping->domain, iova, phys, len, 0); +		ret = iommu_map(mapping->domain, iova, phys, len, +				IOMMU_READ|IOMMU_WRITE);  		if (ret < 0)  			goto fail;  		iova += len; @@ -1317,14 +1408,14 @@ static void __iommu_free_atomic(struct device *dev, void *cpu_addr,  static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,  	    dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)  { -	pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); +	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);  	struct page **pages;  	void *addr = NULL;  	*handle = DMA_ERROR_CODE;  	size = PAGE_ALIGN(size); -	if (gfp & GFP_ATOMIC) +	if (!(gfp & __GFP_WAIT))  		return __iommu_alloc_atomic(dev, size, handle);  	/* @@ -1431,6 +1522,27 @@ static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,  					 GFP_KERNEL);  } +static int __dma_direction_to_prot(enum dma_data_direction dir) +{ +	int prot; + +	switch (dir) { +	case DMA_BIDIRECTIONAL: +		prot = IOMMU_READ | IOMMU_WRITE; +		break; +	case DMA_TO_DEVICE: +		prot = IOMMU_READ; +		break; +	case DMA_FROM_DEVICE: +		prot = IOMMU_WRITE; +		break; +	default: +		prot = 0; +	} + +	return prot; +} +  /*   * Map a part of the scatter-gather list into contiguous io address space   */ @@ -1444,6 +1556,7 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,  	int ret = 0;  	unsigned int count;  	struct scatterlist *s; +	int prot;  	size = PAGE_ALIGN(size);  	*handle = DMA_ERROR_CODE; @@ -1460,7 +1573,9 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,  			!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))  			__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); -		ret = iommu_map(mapping->domain, iova, phys, len, 0); +		prot = __dma_direction_to_prot(dir); + +		ret = iommu_map(mapping->domain, iova, phys, len, prot);  		if (ret < 0)  			goto fail;  		count += len >> PAGE_SHIFT; @@ -1665,19 +1780,7 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p  	if (dma_addr == DMA_ERROR_CODE)  		return dma_addr; -	switch (dir) { -	case DMA_BIDIRECTIONAL: -		prot = IOMMU_READ | IOMMU_WRITE; -		break; -	case DMA_TO_DEVICE: -		prot = IOMMU_READ; -		break; -	case DMA_FROM_DEVICE: -		prot = IOMMU_WRITE; -		break; -	default: -		prot = 0; -	} +	prot = __dma_direction_to_prot(dir);  	ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot);  	if (ret < 0) @@ -1829,8 +1932,7 @@ struct dma_map_ops iommu_coherent_ops = {   * arm_iommu_create_mapping   * @bus: pointer to the bus holding the client device (for IOMMU calls)   * @base: start address of the valid IO address space - * @size: size of the valid IO address space - * @order: accuracy of the IO addresses allocations + * @size: maximum size of the valid IO address space   *   * Creates a mapping structure which holds information about used/unused   * IO address ranges, which is required to perform memory allocation and @@ -1840,38 +1942,53 @@ struct dma_map_ops iommu_coherent_ops = {   * arm_iommu_attach_device function.   */  struct dma_iommu_mapping * -arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, -			 int order) +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)  { -	unsigned int count = size >> (PAGE_SHIFT + order); -	unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); +	unsigned int bits = size >> PAGE_SHIFT; +	unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long);  	struct dma_iommu_mapping *mapping; +	int extensions = 1;  	int err = -ENOMEM; -	if (!count) +	if (!bitmap_size)  		return ERR_PTR(-EINVAL); +	if (bitmap_size > PAGE_SIZE) { +		extensions = bitmap_size / PAGE_SIZE; +		bitmap_size = PAGE_SIZE; +	} +  	mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);  	if (!mapping)  		goto err; -	mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL); -	if (!mapping->bitmap) +	mapping->bitmap_size = bitmap_size; +	mapping->bitmaps = kzalloc(extensions * sizeof(unsigned long *), +				GFP_KERNEL); +	if (!mapping->bitmaps)  		goto err2; +	mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL); +	if (!mapping->bitmaps[0]) +		goto err3; + +	mapping->nr_bitmaps = 1; +	mapping->extensions = extensions;  	mapping->base = base;  	mapping->bits = BITS_PER_BYTE * bitmap_size; -	mapping->order = order; +  	spin_lock_init(&mapping->lock);  	mapping->domain = iommu_domain_alloc(bus);  	if (!mapping->domain) -		goto err3; +		goto err4;  	kref_init(&mapping->kref);  	return mapping; +err4: +	kfree(mapping->bitmaps[0]);  err3: -	kfree(mapping->bitmap); +	kfree(mapping->bitmaps);  err2:  	kfree(mapping);  err: @@ -1881,14 +1998,35 @@ EXPORT_SYMBOL_GPL(arm_iommu_create_mapping);  static void release_iommu_mapping(struct kref *kref)  { +	int i;  	struct dma_iommu_mapping *mapping =  		container_of(kref, struct dma_iommu_mapping, kref);  	iommu_domain_free(mapping->domain); -	kfree(mapping->bitmap); +	for (i = 0; i < mapping->nr_bitmaps; i++) +		kfree(mapping->bitmaps[i]); +	kfree(mapping->bitmaps);  	kfree(mapping);  } +static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) +{ +	int next_bitmap; + +	if (mapping->nr_bitmaps > mapping->extensions) +		return -EINVAL; + +	next_bitmap = mapping->nr_bitmaps; +	mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size, +						GFP_ATOMIC); +	if (!mapping->bitmaps[next_bitmap]) +		return -ENOMEM; + +	mapping->nr_bitmaps++; + +	return 0; +} +  void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)  {  	if (mapping) diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c new file mode 100644 index 00000000000..c508f41a43b --- /dev/null +++ b/arch/arm/mm/dump.c @@ -0,0 +1,365 @@ +/* + * Debug helper to dump the current kernel pagetables of the system + * so that we can see what the various memory ranges are set to. + * + * Derived from x86 implementation: + * (C) Copyright 2008 Intel Corporation + * + * Author: Arjan van de Ven <arjan@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/seq_file.h> + +#include <asm/fixmap.h> +#include <asm/pgtable.h> + +struct addr_marker { +	unsigned long start_address; +	const char *name; +}; + +static struct addr_marker address_markers[] = { +	{ MODULES_VADDR,	"Modules" }, +	{ PAGE_OFFSET,		"Kernel Mapping" }, +	{ 0,			"vmalloc() Area" }, +	{ VMALLOC_END,		"vmalloc() End" }, +	{ FIXADDR_START,	"Fixmap Area" }, +	{ CONFIG_VECTORS_BASE,	"Vectors" }, +	{ CONFIG_VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" }, +	{ -1,			NULL }, +}; + +struct pg_state { +	struct seq_file *seq; +	const struct addr_marker *marker; +	unsigned long start_address; +	unsigned level; +	u64 current_prot; +}; + +struct prot_bits { +	u64		mask; +	u64		val; +	const char	*set; +	const char	*clear; +}; + +static const struct prot_bits pte_bits[] = { +	{ +		.mask	= L_PTE_USER, +		.val	= L_PTE_USER, +		.set	= "USR", +		.clear	= "   ", +	}, { +		.mask	= L_PTE_RDONLY, +		.val	= L_PTE_RDONLY, +		.set	= "ro", +		.clear	= "RW", +	}, { +		.mask	= L_PTE_XN, +		.val	= L_PTE_XN, +		.set	= "NX", +		.clear	= "x ", +	}, { +		.mask	= L_PTE_SHARED, +		.val	= L_PTE_SHARED, +		.set	= "SHD", +		.clear	= "   ", +	}, { +		.mask	= L_PTE_MT_MASK, +		.val	= L_PTE_MT_UNCACHED, +		.set	= "SO/UNCACHED", +	}, { +		.mask	= L_PTE_MT_MASK, +		.val	= L_PTE_MT_BUFFERABLE, +		.set	= "MEM/BUFFERABLE/WC", +	}, { +		.mask	= L_PTE_MT_MASK, +		.val	= L_PTE_MT_WRITETHROUGH, +		.set	= "MEM/CACHED/WT", +	}, { +		.mask	= L_PTE_MT_MASK, +		.val	= L_PTE_MT_WRITEBACK, +		.set	= "MEM/CACHED/WBRA", +#ifndef CONFIG_ARM_LPAE +	}, { +		.mask	= L_PTE_MT_MASK, +		.val	= L_PTE_MT_MINICACHE, +		.set	= "MEM/MINICACHE", +#endif +	}, { +		.mask	= L_PTE_MT_MASK, +		.val	= L_PTE_MT_WRITEALLOC, +		.set	= "MEM/CACHED/WBWA", +	}, { +		.mask	= L_PTE_MT_MASK, +		.val	= L_PTE_MT_DEV_SHARED, +		.set	= "DEV/SHARED", +#ifndef CONFIG_ARM_LPAE +	}, { +		.mask	= L_PTE_MT_MASK, +		.val	= L_PTE_MT_DEV_NONSHARED, +		.set	= "DEV/NONSHARED", +#endif +	}, { +		.mask	= L_PTE_MT_MASK, +		.val	= L_PTE_MT_DEV_WC, +		.set	= "DEV/WC", +	}, { +		.mask	= L_PTE_MT_MASK, +		.val	= L_PTE_MT_DEV_CACHED, +		.set	= "DEV/CACHED", +	}, +}; + +static const struct prot_bits section_bits[] = { +#ifdef CONFIG_ARM_LPAE +	{ +		.mask	= PMD_SECT_USER, +		.val	= PMD_SECT_USER, +		.set	= "USR", +	}, { +		.mask	= PMD_SECT_RDONLY, +		.val	= PMD_SECT_RDONLY, +		.set	= "ro", +		.clear	= "RW", +#elif __LINUX_ARM_ARCH__ >= 6 +	{ +		.mask	= PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, +		.val	= PMD_SECT_APX | PMD_SECT_AP_WRITE, +		.set	= "    ro", +	}, { +		.mask	= PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, +		.val	= PMD_SECT_AP_WRITE, +		.set	= "    RW", +	}, { +		.mask	= PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, +		.val	= PMD_SECT_AP_READ, +		.set	= "USR ro", +	}, { +		.mask	= PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, +		.val	= PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, +		.set	= "USR RW", +#else /* ARMv4/ARMv5  */ +	/* These are approximate */ +	{ +		.mask   = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, +		.val    = 0, +		.set    = "    ro", +	}, { +		.mask   = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, +		.val    = PMD_SECT_AP_WRITE, +		.set    = "    RW", +	}, { +		.mask   = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, +		.val    = PMD_SECT_AP_READ, +		.set    = "USR ro", +	}, { +		.mask   = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, +		.val    = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, +		.set    = "USR RW", +#endif +	}, { +		.mask	= PMD_SECT_XN, +		.val	= PMD_SECT_XN, +		.set	= "NX", +		.clear	= "x ", +	}, { +		.mask	= PMD_SECT_S, +		.val	= PMD_SECT_S, +		.set	= "SHD", +		.clear	= "   ", +	}, +}; + +struct pg_level { +	const struct prot_bits *bits; +	size_t num; +	u64 mask; +}; + +static struct pg_level pg_level[] = { +	{ +	}, { /* pgd */ +	}, { /* pud */ +	}, { /* pmd */ +		.bits	= section_bits, +		.num	= ARRAY_SIZE(section_bits), +	}, { /* pte */ +		.bits	= pte_bits, +		.num	= ARRAY_SIZE(pte_bits), +	}, +}; + +static void dump_prot(struct pg_state *st, const struct prot_bits *bits, size_t num) +{ +	unsigned i; + +	for (i = 0; i < num; i++, bits++) { +		const char *s; + +		if ((st->current_prot & bits->mask) == bits->val) +			s = bits->set; +		else +			s = bits->clear; + +		if (s) +			seq_printf(st->seq, " %s", s); +	} +} + +static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u64 val) +{ +	static const char units[] = "KMGTPE"; +	u64 prot = val & pg_level[level].mask; + +	if (addr < USER_PGTABLES_CEILING) +		return; + +	if (!st->level) { +		st->level = level; +		st->current_prot = prot; +		seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); +	} else if (prot != st->current_prot || level != st->level || +		   addr >= st->marker[1].start_address) { +		const char *unit = units; +		unsigned long delta; + +		if (st->current_prot) { +			seq_printf(st->seq, "0x%08lx-0x%08lx   ", +				   st->start_address, addr); + +			delta = (addr - st->start_address) >> 10; +			while (!(delta & 1023) && unit[1]) { +				delta >>= 10; +				unit++; +			} +			seq_printf(st->seq, "%9lu%c", delta, *unit); +			if (pg_level[st->level].bits) +				dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num); +			seq_printf(st->seq, "\n"); +		} + +		if (addr >= st->marker[1].start_address) { +			st->marker++; +			seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); +		} +		st->start_address = addr; +		st->current_prot = prot; +		st->level = level; +	} +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +{ +	pte_t *pte = pte_offset_kernel(pmd, 0); +	unsigned long addr; +	unsigned i; + +	for (i = 0; i < PTRS_PER_PTE; i++, pte++) { +		addr = start + i * PAGE_SIZE; +		note_page(st, addr, 4, pte_val(*pte)); +	} +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ +	pmd_t *pmd = pmd_offset(pud, 0); +	unsigned long addr; +	unsigned i; + +	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { +		addr = start + i * PMD_SIZE; +		if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd)) +			note_page(st, addr, 3, pmd_val(*pmd)); +		else +			walk_pte(st, pmd, addr); + +		if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1])) +			note_page(st, addr + SECTION_SIZE, 3, pmd_val(pmd[1])); +	} +} + +static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ +	pud_t *pud = pud_offset(pgd, 0); +	unsigned long addr; +	unsigned i; + +	for (i = 0; i < PTRS_PER_PUD; i++, pud++) { +		addr = start + i * PUD_SIZE; +		if (!pud_none(*pud)) { +			walk_pmd(st, pud, addr); +		} else { +			note_page(st, addr, 2, pud_val(*pud)); +		} +	} +} + +static void walk_pgd(struct seq_file *m) +{ +	pgd_t *pgd = swapper_pg_dir; +	struct pg_state st; +	unsigned long addr; +	unsigned i, pgdoff = USER_PGTABLES_CEILING / PGDIR_SIZE; + +	memset(&st, 0, sizeof(st)); +	st.seq = m; +	st.marker = address_markers; + +	pgd += pgdoff; + +	for (i = pgdoff; i < PTRS_PER_PGD; i++, pgd++) { +		addr = i * PGDIR_SIZE; +		if (!pgd_none(*pgd)) { +			walk_pud(&st, pgd, addr); +		} else { +			note_page(&st, addr, 1, pgd_val(*pgd)); +		} +	} + +	note_page(&st, 0, 0, 0); +} + +static int ptdump_show(struct seq_file *m, void *v) +{ +	walk_pgd(m); +	return 0; +} + +static int ptdump_open(struct inode *inode, struct file *file) +{ +	return single_open(file, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { +	.open		= ptdump_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= single_release, +}; + +static int ptdump_init(void) +{ +	struct dentry *pe; +	unsigned i, j; + +	for (i = 0; i < ARRAY_SIZE(pg_level); i++) +		if (pg_level[i].bits) +			for (j = 0; j < pg_level[i].num; j++) +				pg_level[i].mask |= pg_level[i].bits[j].mask; + +	address_markers[2].start_address = VMALLOC_START; + +	pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, +				 &ptdump_fops); +	return pe ? 0 : -ENOMEM; +} +__initcall(ptdump_init); diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c index 9d285626bc7..312e15e6d00 100644 --- a/arch/arm/mm/extable.c +++ b/arch/arm/mm/extable.c @@ -9,8 +9,13 @@ int fixup_exception(struct pt_regs *regs)  	const struct exception_table_entry *fixup;  	fixup = search_exception_tables(instruction_pointer(regs)); -	if (fixup) +	if (fixup) {  		regs->ARM_pc = fixup->fixup; +#ifdef CONFIG_THUMB2_KERNEL +		/* Clear the IT state to avoid nasty surprises in the fixup */ +		regs->ARM_cpsr &= ~PSR_IT_MASK; +#endif +	}  	return fixup != NULL;  } diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 2a5907b5c8d..ff379ac115d 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -65,7 +65,7 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address,  	return ret;  } -#if USE_SPLIT_PTLOCKS +#if USE_SPLIT_PTE_PTLOCKS  /*   * If we are using split PTE locks, then we need to take the page   * lock here.  Otherwise we are using shared mm->page_table_lock @@ -84,10 +84,10 @@ static inline void do_pte_unlock(spinlock_t *ptl)  {  	spin_unlock(ptl);  } -#else /* !USE_SPLIT_PTLOCKS */ +#else /* !USE_SPLIT_PTE_PTLOCKS */  static inline void do_pte_lock(spinlock_t *ptl) {}  static inline void do_pte_unlock(spinlock_t *ptl) {} -#endif /* USE_SPLIT_PTLOCKS */ +#endif /* USE_SPLIT_PTE_PTLOCKS */  static int adjust_pte(struct vm_area_struct *vma, unsigned long address,  	unsigned long pfn) diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 6d5ba9afb16..43d54f5b26b 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -104,17 +104,20 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsig  #define flush_icache_alias(pfn,vaddr,len)	do { } while (0)  #endif +#define FLAG_PA_IS_EXEC 1 +#define FLAG_PA_CORE_IN_MM 2 +  static void flush_ptrace_access_other(void *args)  {  	__flush_icache_all();  } -static -void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, -			 unsigned long uaddr, void *kaddr, unsigned long len) +static inline +void __flush_ptrace_access(struct page *page, unsigned long uaddr, void *kaddr, +			   unsigned long len, unsigned int flags)  {  	if (cache_is_vivt()) { -		if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { +		if (flags & FLAG_PA_CORE_IN_MM) {  			unsigned long addr = (unsigned long)kaddr;  			__cpuc_coherent_kern_range(addr, addr + len);  		} @@ -128,7 +131,7 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,  	}  	/* VIPT non-aliasing D-cache */ -	if (vma->vm_flags & VM_EXEC) { +	if (flags & FLAG_PA_IS_EXEC) {  		unsigned long addr = (unsigned long)kaddr;  		if (icache_is_vipt_aliasing())  			flush_icache_alias(page_to_pfn(page), uaddr, len); @@ -140,6 +143,26 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,  	}  } +static +void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, +			 unsigned long uaddr, void *kaddr, unsigned long len) +{ +	unsigned int flags = 0; +	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) +		flags |= FLAG_PA_CORE_IN_MM; +	if (vma->vm_flags & VM_EXEC) +		flags |= FLAG_PA_IS_EXEC; +	__flush_ptrace_access(page, uaddr, kaddr, len, flags); +} + +void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, +			     void *kaddr, unsigned long len) +{ +	unsigned int flags = FLAG_PA_CORE_IN_MM|FLAG_PA_IS_EXEC; + +	__flush_ptrace_access(page, uaddr, kaddr, len, flags); +} +  /*   * Copy user data from/to a page which is mapped into a different   * processes address space.  Really, we want to allow our "user @@ -175,16 +198,16 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)  		unsigned long i;  		if (cache_is_vipt_nonaliasing()) {  			for (i = 0; i < (1 << compound_order(page)); i++) { -				void *addr = kmap_atomic(page); +				void *addr = kmap_atomic(page + i);  				__cpuc_flush_dcache_area(addr, PAGE_SIZE);  				kunmap_atomic(addr);  			}  		} else {  			for (i = 0; i < (1 << compound_order(page)); i++) { -				void *addr = kmap_high_get(page); +				void *addr = kmap_high_get(page + i);  				if (addr) {  					__cpuc_flush_dcache_area(addr, PAGE_SIZE); -					kunmap_high(page); +					kunmap_high(page + i);  				}  			}  		} diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index 21b9e1bf9b7..45aeaaca905 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -18,6 +18,21 @@  #include <asm/tlbflush.h>  #include "mm.h" +pte_t *fixmap_page_table; + +static inline void set_fixmap_pte(int idx, pte_t pte) +{ +	unsigned long vaddr = __fix_to_virt(idx); +	set_pte_ext(fixmap_page_table + idx, pte, 0); +	local_flush_tlb_kernel_page(vaddr); +} + +static inline pte_t get_fixmap_pte(unsigned long vaddr) +{ +	unsigned long idx = __virt_to_fix(vaddr); +	return *(fixmap_page_table + idx); +} +  void *kmap(struct page *page)  {  	might_sleep(); @@ -63,20 +78,20 @@ void *kmap_atomic(struct page *page)  	type = kmap_atomic_idx_push();  	idx = type + KM_TYPE_NR * smp_processor_id(); -	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +	vaddr = __fix_to_virt(idx);  #ifdef CONFIG_DEBUG_HIGHMEM  	/*  	 * With debugging enabled, kunmap_atomic forces that entry to 0.  	 * Make sure it was indeed properly unmapped.  	 */ -	BUG_ON(!pte_none(get_top_pte(vaddr))); +	BUG_ON(!pte_none(*(fixmap_page_table + idx)));  #endif  	/*  	 * When debugging is off, kunmap_atomic leaves the previous mapping  	 * in place, so the contained TLB flush ensures the TLB is updated  	 * with the new mapping.  	 */ -	set_top_pte(vaddr, mk_pte(page, kmap_prot)); +	set_fixmap_pte(idx, mk_pte(page, kmap_prot));  	return (void *)vaddr;  } @@ -94,8 +109,8 @@ void __kunmap_atomic(void *kvaddr)  		if (cache_is_vivt())  			__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);  #ifdef CONFIG_DEBUG_HIGHMEM -		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); -		set_top_pte(vaddr, __pte(0)); +		BUG_ON(vaddr != __fix_to_virt(idx)); +		set_fixmap_pte(idx, __pte(0));  #else  		(void) idx;  /* to kill a warning */  #endif @@ -117,11 +132,11 @@ void *kmap_atomic_pfn(unsigned long pfn)  	type = kmap_atomic_idx_push();  	idx = type + KM_TYPE_NR * smp_processor_id(); -	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +	vaddr = __fix_to_virt(idx);  #ifdef CONFIG_DEBUG_HIGHMEM -	BUG_ON(!pte_none(get_top_pte(vaddr))); +	BUG_ON(!pte_none(*(fixmap_page_table + idx)));  #endif -	set_top_pte(vaddr, pfn_pte(pfn, kmap_prot)); +	set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));  	return (void *)vaddr;  } @@ -133,5 +148,5 @@ struct page *kmap_atomic_to_page(const void *ptr)  	if (vaddr < FIXADDR_START)  		return virt_to_page(ptr); -	return pte_page(get_top_pte(vaddr)); +	return pte_page(get_fixmap_pte(vaddr));  } diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c index 54ee6163c18..66781bf3407 100644 --- a/arch/arm/mm/hugetlbpage.c +++ b/arch/arm/mm/hugetlbpage.c @@ -56,8 +56,3 @@ int pmd_huge(pmd_t pmd)  {  	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);  } - -int pmd_huge_support(void) -{ -	return 1; -} diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 83cb3ac2709..c447ec70e86 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -9,7 +9,13 @@  #include <asm/sections.h>  #include <asm/system_info.h> +/* + * Note: accesses outside of the kernel image and the identity map area + * are not supported on any CPU using the idmap tables as its current + * page tables. + */  pgd_t *idmap_pgd; +phys_addr_t (*arch_virt_to_idmap) (unsigned long x);  #ifdef CONFIG_ARM_LPAE  static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, @@ -24,6 +30,13 @@ static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,  			pr_warning("Failed to allocate identity pmd.\n");  			return;  		} +		/* +		 * Copy the original PMD to ensure that the PMD entries for +		 * the kernel image are preserved. +		 */ +		if (!pud_none(*pud)) +			memcpy(pmd, pmd_offset(pud, 0), +			       PTRS_PER_PMD * sizeof(pmd_t));  		pud_populate(&init_mm, pud, pmd);  		pmd += pmd_index(addr);  	} else @@ -67,8 +80,9 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start,  	unsigned long addr, end;  	unsigned long next; -	addr = virt_to_phys(text_start); -	end = virt_to_phys(text_end); +	addr = virt_to_idmap(text_start); +	end = virt_to_idmap(text_end); +	pr_info("Setting up static identity map for 0x%lx - 0x%lx\n", addr, end);  	prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; @@ -90,8 +104,6 @@ static int __init init_static_idmap(void)  	if (!idmap_pgd)  		return -ENOMEM; -	pr_info("Setting up static identity map for 0x%p - 0x%p\n", -		__idmap_text_start, __idmap_text_end);  	identity_mapping_add(idmap_pgd, __idmap_text_start,  			     __idmap_text_end, 0); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index febaee7ca57..659c75d808d 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -17,13 +17,13 @@  #include <linux/nodemask.h>  #include <linux/initrd.h>  #include <linux/of_fdt.h> -#include <linux/of_reserved_mem.h>  #include <linux/highmem.h>  #include <linux/gfp.h>  #include <linux/memblock.h>  #include <linux/dma-contiguous.h>  #include <linux/sizes.h> +#include <asm/cp15.h>  #include <asm/mach-types.h>  #include <asm/memblock.h>  #include <asm/prom.h> @@ -37,6 +37,14 @@  #include "mm.h" +#ifdef CONFIG_CPU_CP15_MMU +unsigned long __init __clear_cr(unsigned long mask) +{ +	cr_alignment = cr_alignment & ~mask; +	return cr_alignment; +} +#endif +  static phys_addr_t phys_initrd_start __initdata = 0;  static unsigned long phys_initrd_size __initdata = 0; @@ -77,40 +85,26 @@ static int __init parse_tag_initrd2(const struct tag *tag)  __tagtable(ATAG_INITRD2, parse_tag_initrd2); -#ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) -{ -	phys_initrd_start = start; -	phys_initrd_size = end - start; -} -#endif /* CONFIG_OF_FLATTREE */ -  /*   * This keeps memory configuration data used by a couple memory   * initialization functions, as well as show_mem() for the skipping   * of holes in the memory map.  It is populated by arm_add_memory().   */ -struct meminfo meminfo; -  void show_mem(unsigned int filter)  {  	int free = 0, total = 0, reserved = 0; -	int shared = 0, cached = 0, slab = 0, i; -	struct meminfo * mi = &meminfo; +	int shared = 0, cached = 0, slab = 0; +	struct memblock_region *reg;  	printk("Mem-info:\n");  	show_free_areas(filter); -	if (filter & SHOW_MEM_FILTER_PAGE_COUNT) -		return; - -	for_each_bank (i, mi) { -		struct membank *bank = &mi->bank[i]; +	for_each_memblock (memory, reg) {  		unsigned int pfn1, pfn2;  		struct page *page, *end; -		pfn1 = bank_pfn_start(bank); -		pfn2 = bank_pfn_end(bank); +		pfn1 = memblock_region_memory_base_pfn(reg); +		pfn2 = memblock_region_memory_end_pfn(reg);  		page = pfn_to_page(pfn1);  		end  = pfn_to_page(pfn2 - 1) + 1; @@ -127,8 +121,9 @@ void show_mem(unsigned int filter)  				free++;  			else  				shared += page_count(page) - 1; -			page++; -		} while (page < end); +			pfn1++; +			page = pfn_to_page(pfn1); +		} while (pfn1 < pfn2);  	}  	printk("%d pages of RAM\n", total); @@ -142,68 +137,9 @@ void show_mem(unsigned int filter)  static void __init find_limits(unsigned long *min, unsigned long *max_low,  			       unsigned long *max_high)  { -	struct meminfo *mi = &meminfo; -	int i; - -	/* This assumes the meminfo array is properly sorted */ -	*min = bank_pfn_start(&mi->bank[0]); -	for_each_bank (i, mi) -		if (mi->bank[i].highmem) -				break; -	*max_low = bank_pfn_end(&mi->bank[i - 1]); -	*max_high = bank_pfn_end(&mi->bank[mi->nr_banks - 1]); -} - -static void __init arm_bootmem_init(unsigned long start_pfn, -	unsigned long end_pfn) -{ -	struct memblock_region *reg; -	unsigned int boot_pages; -	phys_addr_t bitmap; -	pg_data_t *pgdat; - -	/* -	 * Allocate the bootmem bitmap page.  This must be in a region -	 * of memory which has already been mapped. -	 */ -	boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn); -	bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES, -				__pfn_to_phys(end_pfn)); - -	/* -	 * Initialise the bootmem allocator, handing the -	 * memory banks over to bootmem. -	 */ -	node_set_online(0); -	pgdat = NODE_DATA(0); -	init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn); - -	/* Free the lowmem regions from memblock into bootmem. */ -	for_each_memblock(memory, reg) { -		unsigned long start = memblock_region_memory_base_pfn(reg); -		unsigned long end = memblock_region_memory_end_pfn(reg); - -		if (end >= end_pfn) -			end = end_pfn; -		if (start >= end) -			break; - -		free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT); -	} - -	/* Reserve the lowmem memblock reserved regions in bootmem. */ -	for_each_memblock(reserved, reg) { -		unsigned long start = memblock_region_reserved_base_pfn(reg); -		unsigned long end = memblock_region_reserved_end_pfn(reg); - -		if (end >= end_pfn) -			end = end_pfn; -		if (start >= end) -			break; - -		reserve_bootmem(__pfn_to_phys(start), -			        (end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT); -	} +	*max_low = PFN_DOWN(memblock_get_current_limit()); +	*min = PFN_UP(memblock_start_of_DRAM()); +	*max_high = PFN_DOWN(memblock_end_of_DRAM());  }  #ifdef CONFIG_ZONE_DMA @@ -218,6 +154,7 @@ EXPORT_SYMBOL(arm_dma_zone_size);   * so a successful GFP_DMA allocation will always satisfy this.   */  phys_addr_t arm_dma_limit; +unsigned long arm_dma_pfn_limit;  static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole,  	unsigned long dma_size) @@ -240,10 +177,11 @@ void __init setup_dma_zone(const struct machine_desc *mdesc)  		arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1;  	} else  		arm_dma_limit = 0xffffffff; +	arm_dma_pfn_limit = arm_dma_limit >> PAGE_SHIFT;  #endif  } -static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, +static void __init zone_sizes_init(unsigned long min, unsigned long max_low,  	unsigned long max_high)  {  	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; @@ -336,14 +274,8 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)  	return phys;  } -void __init arm_memblock_init(struct meminfo *mi, -	const struct machine_desc *mdesc) +void __init arm_memblock_init(const struct machine_desc *mdesc)  { -	int i; - -	for (i = 0; i < mi->nr_banks; i++) -		memblock_add(mi->bank[i].start, mi->bank[i].size); -  	/* Register the kernel text, kernel data and initrd with memblock. */  #ifdef CONFIG_XIP_KERNEL  	memblock_reserve(__pa(_sdata), _end - _sdata); @@ -351,6 +283,12 @@ void __init arm_memblock_init(struct meminfo *mi,  	memblock_reserve(__pa(_stext), _end - _stext);  #endif  #ifdef CONFIG_BLK_DEV_INITRD +	/* FDT scan will populate initrd_start */ +	if (initrd_start && !phys_initrd_size) { +		phys_initrd_start = __virt_to_phys(initrd_start); +		phys_initrd_size = initrd_end - initrd_start; +	} +	initrd_start = initrd_end = 0;  	if (phys_initrd_size &&  	    !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) {  		pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n", @@ -373,13 +311,12 @@ void __init arm_memblock_init(struct meminfo *mi,  #endif  	arm_mm_memblock_reserve(); -	arm_dt_memblock_reserve();  	/* reserve any platform specific memblock areas */  	if (mdesc->reserve)  		mdesc->reserve(); -	early_init_dt_scan_reserved_mem(); +	early_init_fdt_scan_reserved_mem();  	/*  	 * reserve memory for DMA contigouos allocations, @@ -388,7 +325,6 @@ void __init arm_memblock_init(struct meminfo *mi,  	dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));  	arm_memblock_steal_permitted = false; -	memblock_allow_resize();  	memblock_dump_all();  } @@ -396,12 +332,11 @@ void __init bootmem_init(void)  {  	unsigned long min, max_low, max_high; +	memblock_allow_resize();  	max_low = max_high = 0;  	find_limits(&min, &max_low, &max_high); -	arm_bootmem_init(min, max_low); -  	/*  	 * Sparsemem tries to allocate bootmem in memory_present(),  	 * so must be done after the fixed reservations @@ -418,18 +353,16 @@ void __init bootmem_init(void)  	 * the sparse mem_map arrays initialized by sparse_init()  	 * for memmap_init_zone(), otherwise all PFNs are invalid.  	 */ -	arm_bootmem_free(min, max_low, max_high); +	zone_sizes_init(min, max_low, max_high);  	/*  	 * This doesn't seem to be used by the Linux memory manager any  	 * more, but is used by ll_rw_block.  If we can get rid of it, we  	 * also get rid of some of the stuff above as well. -	 * -	 * Note: max_low_pfn and max_pfn reflect the number of _pages_ in -	 * the system, not the maximum PFN.  	 */ -	max_low_pfn = max_low - PHYS_PFN_OFFSET; -	max_pfn = max_high - PHYS_PFN_OFFSET; +	min_low_pfn = min; +	max_low_pfn = max_low; +	max_pfn = max_high;  }  /* @@ -467,60 +400,59 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)  	 * free the section of the memmap array.  	 */  	if (pg < pgend) -		free_bootmem(pg, pgend - pg); +		memblock_free_early(pg, pgend - pg);  }  /*   * The mem_map array can get very big.  Free the unused area of the memory map.   */ -static void __init free_unused_memmap(struct meminfo *mi) +static void __init free_unused_memmap(void)  { -	unsigned long bank_start, prev_bank_end = 0; -	unsigned int i; +	unsigned long start, prev_end = 0; +	struct memblock_region *reg;  	/*  	 * This relies on each bank being in address order.  	 * The banks are sorted previously in bootmem_init().  	 */ -	for_each_bank(i, mi) { -		struct membank *bank = &mi->bank[i]; - -		bank_start = bank_pfn_start(bank); +	for_each_memblock(memory, reg) { +		start = memblock_region_memory_base_pfn(reg);  #ifdef CONFIG_SPARSEMEM  		/*  		 * Take care not to free memmap entries that don't exist  		 * due to SPARSEMEM sections which aren't present.  		 */ -		bank_start = min(bank_start, -				 ALIGN(prev_bank_end, PAGES_PER_SECTION)); +		start = min(start, +				 ALIGN(prev_end, PAGES_PER_SECTION));  #else  		/*  		 * Align down here since the VM subsystem insists that the  		 * memmap entries are valid from the bank start aligned to  		 * MAX_ORDER_NR_PAGES.  		 */ -		bank_start = round_down(bank_start, MAX_ORDER_NR_PAGES); +		start = round_down(start, MAX_ORDER_NR_PAGES);  #endif  		/*  		 * If we had a previous bank, and there is a space  		 * between the current bank and the previous, free it.  		 */ -		if (prev_bank_end && prev_bank_end < bank_start) -			free_memmap(prev_bank_end, bank_start); +		if (prev_end && prev_end < start) +			free_memmap(prev_end, start);  		/*  		 * Align up here since the VM subsystem insists that the  		 * memmap entries are valid from the bank end aligned to  		 * MAX_ORDER_NR_PAGES.  		 */ -		prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES); +		prev_end = ALIGN(memblock_region_memory_end_pfn(reg), +				 MAX_ORDER_NR_PAGES);  	}  #ifdef CONFIG_SPARSEMEM -	if (!IS_ALIGNED(prev_bank_end, PAGES_PER_SECTION)) -		free_memmap(prev_bank_end, -			    ALIGN(prev_bank_end, PAGES_PER_SECTION)); +	if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) +		free_memmap(prev_end, +			    ALIGN(prev_end, PAGES_PER_SECTION));  #endif  } @@ -535,7 +467,7 @@ static inline void free_area_high(unsigned long pfn, unsigned long end)  static void __init free_highpages(void)  {  #ifdef CONFIG_HIGHMEM -	unsigned long max_low = max_low_pfn + PHYS_PFN_OFFSET; +	unsigned long max_low = max_low_pfn;  	struct memblock_region *mem, *res;  	/* set highmem page free */ @@ -593,10 +525,10 @@ void __init mem_init(void)  	extern u32 itcm_end;  #endif -	max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; +	set_max_mapnr(pfn_to_page(max_pfn) - mem_map);  	/* this will put all unused low memory onto the freelists */ -	free_unused_memmap(&meminfo); +	free_unused_memmap();  	free_all_bootmem();  #ifdef CONFIG_SA1111 diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index f123d6eb074..d1e5ad7ab3b 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -392,9 +392,9 @@ __arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached)  	unsigned int mtype;  	if (cached) -		mtype = MT_MEMORY; +		mtype = MT_MEMORY_RWX;  	else -		mtype = MT_MEMORY_NONCACHED; +		mtype = MT_MEMORY_RWX_NONCACHED;  	return __arm_ioremap_caller(phys_addr, size, mtype,  			__builtin_return_address(0)); @@ -438,6 +438,13 @@ void __arm_iounmap(volatile void __iomem *io_addr)  EXPORT_SYMBOL(__arm_iounmap);  #ifdef CONFIG_PCI +static int pci_ioremap_mem_type = MT_DEVICE; + +void pci_ioremap_set_mem_type(int mem_type) +{ +	pci_ioremap_mem_type = mem_type; +} +  int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)  {  	BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); @@ -445,7 +452,7 @@ int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)  	return ioremap_page_range(PCI_IO_VIRT_BASE + offset,  				  PCI_IO_VIRT_BASE + offset + SZ_64K,  				  phys_addr, -				  __pgprot(get_mem_type(MT_DEVICE)->prot_pte)); +				  __pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte));  }  EXPORT_SYMBOL_GPL(pci_ioremap_io);  #endif diff --git a/arch/arm/mm/l2c-common.c b/arch/arm/mm/l2c-common.c new file mode 100644 index 00000000000..10a3cf28c36 --- /dev/null +++ b/arch/arm/mm/l2c-common.c @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2010 ARM Ltd. + * Written by Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/bug.h> +#include <linux/smp.h> +#include <asm/outercache.h> + +void outer_disable(void) +{ +	WARN_ON(!irqs_disabled()); +	WARN_ON(num_online_cpus() > 1); + +	if (outer_cache.disable) +		outer_cache.disable(); +} diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S new file mode 100644 index 00000000000..99b05f21a59 --- /dev/null +++ b/arch/arm/mm/l2c-l2x0-resume.S @@ -0,0 +1,58 @@ +/* + * L2C-310 early resume code.  This can be used by platforms to restore + * the settings of their L2 cache controller before restoring the + * processor state. + * + * This code can only be used to if you are running in the secure world. + */ +#include <linux/linkage.h> +#include <asm/hardware/cache-l2x0.h> + +	.text + +ENTRY(l2c310_early_resume) +	adr	r0, 1f +	ldr	r2, [r0] +	add	r0, r2, r0 + +	ldmia	r0, {r1, r2, r3, r4, r5, r6, r7, r8} +	@ r1 = phys address of L2C-310 controller +	@ r2 = aux_ctrl +	@ r3 = tag_latency +	@ r4 = data_latency +	@ r5 = filter_start +	@ r6 = filter_end +	@ r7 = prefetch_ctrl +	@ r8 = pwr_ctrl + +	@ Check that the address has been initialised +	teq	r1, #0 +	moveq	pc, lr + +	@ The prefetch and power control registers are revision dependent +	@ and can be written whether or not the L2 cache is enabled +	ldr	r0, [r1, #L2X0_CACHE_ID] +	and	r0, r0, #L2X0_CACHE_ID_RTL_MASK +	cmp	r0, #L310_CACHE_ID_RTL_R2P0 +	strcs	r7, [r1, #L310_PREFETCH_CTRL] +	cmp	r0, #L310_CACHE_ID_RTL_R3P0 +	strcs	r8, [r1, #L310_POWER_CTRL] + +	@ Don't setup the L2 cache if it is already enabled +	ldr	r0, [r1, #L2X0_CTRL] +	tst	r0, #L2X0_CTRL_EN +	movne	pc, lr + +	str	r3, [r1, #L310_TAG_LATENCY_CTRL] +	str	r4, [r1, #L310_DATA_LATENCY_CTRL] +	str	r6, [r1, #L310_ADDR_FILTER_END] +	str	r5, [r1, #L310_ADDR_FILTER_START] + +	str	r2, [r1, #L2X0_AUX_CTRL] +	mov	r9, #L2X0_CTRL_EN +	str	r9, [r1, #L2X0_CTRL] +	mov	pc, lr +ENDPROC(l2c310_early_resume) + +	.align +1:	.long	l2x0_saved_regs - . diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index d5a4e9ad8f0..ce727d47275 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -2,6 +2,8 @@  #include <linux/list.h>  #include <linux/vmalloc.h> +#include <asm/pgtable.h> +  /* the upper-most page table pointer */  extern pmd_t *top_pmd; @@ -38,6 +40,7 @@ static inline pmd_t *pmd_off_k(unsigned long virt)  struct mem_type {  	pteval_t prot_pte; +	pteval_t prot_pte_s2;  	pmdval_t prot_l1;  	pmdval_t prot_sect;  	unsigned int domain; @@ -81,8 +84,10 @@ extern __init void add_static_vm_early(struct static_vm *svm);  #ifdef CONFIG_ZONE_DMA  extern phys_addr_t arm_dma_limit; +extern unsigned long arm_dma_pfn_limit;  #else  #define arm_dma_limit ((phys_addr_t)~0) +#define arm_dma_pfn_limit (~0ul >> PAGE_SHIFT)  #endif  extern phys_addr_t arm_lowmem_limit; @@ -90,3 +95,5 @@ extern phys_addr_t arm_lowmem_limit;  void __init bootmem_init(void);  void arm_mm_memblock_reserve(void);  void dma_contiguous_remap(void); + +unsigned long __clear_cr(unsigned long mask); diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 0c6356255fe..5e85ed37136 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -146,7 +146,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,  	info.flags = VM_UNMAPPED_AREA_TOPDOWN;  	info.length = len; -	info.low_limit = PAGE_SIZE; +	info.low_limit = FIRST_USER_ADDRESS;  	info.high_limit = mm->mmap_base;  	info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;  	info.align_offset = pgoff << PAGE_SHIFT; @@ -202,13 +202,11 @@ int valid_phys_addr_range(phys_addr_t addr, size_t size)  }  /* - * We don't use supersection mappings for mmap() on /dev/mem, which - * means that we can't map the memory area above the 4G barrier into - * userspace. + * Do not allow /dev/mem mappings beyond the supported physical range.   */  int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)  { -	return !(pfn + (size >> PAGE_SHIFT) > 0x00100000); +	return (pfn + (size >> PAGE_SHIFT)) <= (1 + (PHYS_MASK >> PAGE_SHIFT));  }  #ifdef CONFIG_STRICT_DEVMEM diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index b1d17eeb59b..6e3ba8d112a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -22,16 +22,20 @@  #include <asm/cputype.h>  #include <asm/sections.h>  #include <asm/cachetype.h> +#include <asm/sections.h>  #include <asm/setup.h>  #include <asm/smp_plat.h>  #include <asm/tlb.h>  #include <asm/highmem.h>  #include <asm/system_info.h>  #include <asm/traps.h> +#include <asm/procinfo.h> +#include <asm/memory.h>  #include <asm/mach/arch.h>  #include <asm/mach/map.h>  #include <asm/mach/pci.h> +#include <asm/fixmap.h>  #include "mm.h"  #include "tcm.h" @@ -114,28 +118,54 @@ static struct cachepolicy cache_policies[] __initdata = {  };  #ifdef CONFIG_CPU_CP15 +static unsigned long initial_pmd_value __initdata = 0; +  /* - * These are useful for identifying cache coherency - * problems by allowing the cache or the cache and - * writebuffer to be turned off.  (Note: the write - * buffer should not be on and the cache off). + * Initialise the cache_policy variable with the initial state specified + * via the "pmd" value.  This is used to ensure that on ARMv6 and later, + * the C code sets the page tables up with the same policy as the head + * assembly code, which avoids an illegal state where the TLBs can get + * confused.  See comments in early_cachepolicy() for more information.   */ -static int __init early_cachepolicy(char *p) +void __init init_default_cache_policy(unsigned long pmd)  {  	int i; +	initial_pmd_value = pmd; + +	pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE; + +	for (i = 0; i < ARRAY_SIZE(cache_policies); i++) +		if (cache_policies[i].pmd == pmd) { +			cachepolicy = i; +			break; +		} + +	if (i == ARRAY_SIZE(cache_policies)) +		pr_err("ERROR: could not find cache policy\n"); +} + +/* + * These are useful for identifying cache coherency problems by allowing + * the cache or the cache and writebuffer to be turned off.  (Note: the + * write buffer should not be on and the cache off). + */ +static int __init early_cachepolicy(char *p) +{ +	int i, selected = -1; +  	for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {  		int len = strlen(cache_policies[i].policy);  		if (memcmp(p, cache_policies[i].policy, len) == 0) { -			cachepolicy = i; -			cr_alignment &= ~cache_policies[i].cr_mask; -			cr_no_alignment &= ~cache_policies[i].cr_mask; +			selected = i;  			break;  		}  	} -	if (i == ARRAY_SIZE(cache_policies)) -		printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n"); + +	if (selected == -1) +		pr_err("ERROR: unknown or unsupported cache policy\n"); +  	/*  	 * This restriction is partly to do with the way we boot; it is  	 * unpredictable to have memory mapped using two different sets of @@ -143,12 +173,18 @@ static int __init early_cachepolicy(char *p)  	 * change these attributes once the initial assembly has setup the  	 * page tables.  	 */ -	if (cpu_architecture() >= CPU_ARCH_ARMv6) { -		printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n"); -		cachepolicy = CPOLICY_WRITEBACK; +	if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) { +		pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n", +			cache_policies[cachepolicy].policy); +		return 0; +	} + +	if (selected != cachepolicy) { +		unsigned long cr = __clear_cr(cache_policies[selected].cr_mask); +		cachepolicy = selected; +		flush_cache_all(); +		set_cr(cr);  	} -	flush_cache_all(); -	set_cr(cr_alignment);  	return 0;  }  early_param("cachepolicy", early_cachepolicy); @@ -183,35 +219,6 @@ static int __init early_ecc(char *p)  early_param("ecc", early_ecc);  #endif -static int __init noalign_setup(char *__unused) -{ -	cr_alignment &= ~CR_A; -	cr_no_alignment &= ~CR_A; -	set_cr(cr_alignment); -	return 1; -} -__setup("noalign", noalign_setup); - -#ifndef CONFIG_SMP -void adjust_cr(unsigned long mask, unsigned long set) -{ -	unsigned long flags; - -	mask &= ~CR_A; - -	set &= mask; - -	local_irq_save(flags); - -	cr_no_alignment = (cr_no_alignment & ~mask) | set; -	cr_alignment = (cr_alignment & ~mask) | set; - -	set_cr((get_cr() & ~mask) | set); - -	local_irq_restore(flags); -} -#endif -  #else /* ifdef CONFIG_CPU_CP15 */  static int __init early_cachepolicy(char *p) @@ -229,12 +236,16 @@ __setup("noalign", noalign_setup);  #endif /* ifdef CONFIG_CPU_CP15 / else */  #define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN +#define PROT_PTE_S2_DEVICE	PROT_PTE_DEVICE  #define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE  static struct mem_type mem_types[] = {  	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */  		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |  				  L_PTE_SHARED, +		.prot_pte_s2	= s2_policy(PROT_PTE_S2_DEVICE) | +				  s2_policy(L_PTE_S2_MT_DEV_SHARED) | +				  L_PTE_SHARED,  		.prot_l1	= PMD_TYPE_TABLE,  		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_S,  		.domain		= DOMAIN_IO, @@ -285,36 +296,43 @@ static struct mem_type mem_types[] = {  		.prot_l1   = PMD_TYPE_TABLE,  		.domain    = DOMAIN_USER,  	}, -	[MT_MEMORY] = { +	[MT_MEMORY_RWX] = {  		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,  		.prot_l1   = PMD_TYPE_TABLE,  		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,  		.domain    = DOMAIN_KERNEL,  	}, +	[MT_MEMORY_RW] = { +		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | +			     L_PTE_XN, +		.prot_l1   = PMD_TYPE_TABLE, +		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, +		.domain    = DOMAIN_KERNEL, +	},  	[MT_ROM] = {  		.prot_sect = PMD_TYPE_SECT,  		.domain    = DOMAIN_KERNEL,  	}, -	[MT_MEMORY_NONCACHED] = { +	[MT_MEMORY_RWX_NONCACHED] = {  		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |  				L_PTE_MT_BUFFERABLE,  		.prot_l1   = PMD_TYPE_TABLE,  		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,  		.domain    = DOMAIN_KERNEL,  	}, -	[MT_MEMORY_DTCM] = { +	[MT_MEMORY_RW_DTCM] = {  		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |  				L_PTE_XN,  		.prot_l1   = PMD_TYPE_TABLE,  		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,  		.domain    = DOMAIN_KERNEL,  	}, -	[MT_MEMORY_ITCM] = { +	[MT_MEMORY_RWX_ITCM] = {  		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,  		.prot_l1   = PMD_TYPE_TABLE,  		.domain    = DOMAIN_KERNEL,  	}, -	[MT_MEMORY_SO] = { +	[MT_MEMORY_RW_SO] = {  		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |  				L_PTE_MT_UNCACHED | L_PTE_XN,  		.prot_l1   = PMD_TYPE_TABLE, @@ -323,7 +341,8 @@ static struct mem_type mem_types[] = {  		.domain    = DOMAIN_KERNEL,  	},  	[MT_MEMORY_DMA_READY] = { -		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, +		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | +				L_PTE_XN,  		.prot_l1   = PMD_TYPE_TABLE,  		.domain    = DOMAIN_KERNEL,  	}, @@ -335,6 +354,44 @@ const struct mem_type *get_mem_type(unsigned int type)  }  EXPORT_SYMBOL(get_mem_type); +#define PTE_SET_FN(_name, pteop) \ +static int pte_set_##_name(pte_t *ptep, pgtable_t token, unsigned long addr, \ +			void *data) \ +{ \ +	pte_t pte = pteop(*ptep); \ +\ +	set_pte_ext(ptep, pte, 0); \ +	return 0; \ +} \ + +#define SET_MEMORY_FN(_name, callback) \ +int set_memory_##_name(unsigned long addr, int numpages) \ +{ \ +	unsigned long start = addr; \ +	unsigned long size = PAGE_SIZE*numpages; \ +	unsigned end = start + size; \ +\ +	if (start < MODULES_VADDR || start >= MODULES_END) \ +		return -EINVAL;\ +\ +	if (end < MODULES_VADDR || end >= MODULES_END) \ +		return -EINVAL; \ +\ +	apply_to_page_range(&init_mm, start, size, callback, NULL); \ +	flush_tlb_kernel_range(start, end); \ +	return 0;\ +} + +PTE_SET_FN(ro, pte_wrprotect) +PTE_SET_FN(rw, pte_mkwrite) +PTE_SET_FN(x, pte_mkexec) +PTE_SET_FN(nx, pte_mknexec) + +SET_MEMORY_FN(ro, pte_set_ro) +SET_MEMORY_FN(rw, pte_set_rw) +SET_MEMORY_FN(x, pte_set_x) +SET_MEMORY_FN(nx, pte_set_nx) +  /*   * Adjust the PMD section entries according to the CPU in use.   */ @@ -361,8 +418,17 @@ static void __init build_mem_type_table(void)  			cachepolicy = CPOLICY_WRITEBACK;  		ecc_mask = 0;  	} -	if (is_smp()) -		cachepolicy = CPOLICY_WRITEALLOC; + +	if (is_smp()) { +		if (cachepolicy != CPOLICY_WRITEALLOC) { +			pr_warn("Forcing write-allocate cache policy for SMP\n"); +			cachepolicy = CPOLICY_WRITEALLOC; +		} +		if (!(initial_pmd_value & PMD_SECT_S)) { +			pr_warn("Forcing shared mappings for SMP\n"); +			initial_pmd_value |= PMD_SECT_S; +		} +	}  	/*  	 * Strip out features not present on earlier architectures. @@ -408,6 +474,9 @@ static void __init build_mem_type_table(void)  			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;  			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;  			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; + +			/* Also setup NX memory mapping */ +			mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN;  		}  		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {  			/* @@ -456,7 +525,18 @@ static void __init build_mem_type_table(void)  	cp = &cache_policies[cachepolicy];  	vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;  	s2_pgprot = cp->pte_s2; -	hyp_device_pgprot = s2_device_pgprot = mem_types[MT_DEVICE].prot_pte; +	hyp_device_pgprot = mem_types[MT_DEVICE].prot_pte; +	s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2; + +	/* +	 * We don't use domains on ARMv6 (since this causes problems with +	 * v6/v7 kernels), so we must use a separate memory type for user +	 * r/o, kernel r/w to map the vectors page. +	 */ +#ifndef CONFIG_ARM_LPAE +	if (cpu_arch == CPU_ARCH_ARMv6) +		vecs_pgprot |= L_PTE_MT_VECTORS; +#endif  	/*  	 * ARMv6 and above have extended page tables. @@ -472,11 +552,12 @@ static void __init build_mem_type_table(void)  		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;  #endif -		if (is_smp()) { -			/* -			 * Mark memory with the "shared" attribute -			 * for SMP systems -			 */ +		/* +		 * If the initial page tables were created with the S bit +		 * set, then we need to do the same here for the same +		 * reasons given in early_cachepolicy(). +		 */ +		if (initial_pmd_value & PMD_SECT_S) {  			user_pgprot |= L_PTE_SHARED;  			kern_pgprot |= L_PTE_SHARED;  			vecs_pgprot |= L_PTE_SHARED; @@ -485,11 +566,13 @@ static void __init build_mem_type_table(void)  			mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;  			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;  			mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; -			mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; -			mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; +			mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S; +			mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; +			mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; +			mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;  			mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; -			mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; -			mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; +			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; +			mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;  		}  	} @@ -500,15 +583,15 @@ static void __init build_mem_type_table(void)  	if (cpu_arch >= CPU_ARCH_ARMv6) {  		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {  			/* Non-cacheable Normal is XCB = 001 */ -			mem_types[MT_MEMORY_NONCACHED].prot_sect |= +			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=  				PMD_SECT_BUFFERED;  		} else {  			/* For both ARMv6 and non-TEX-remapping ARMv7 */ -			mem_types[MT_MEMORY_NONCACHED].prot_sect |= +			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=  				PMD_SECT_TEX(1);  		}  	} else { -		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE; +		mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;  	}  #ifdef CONFIG_ARM_LPAE @@ -541,10 +624,12 @@ static void __init build_mem_type_table(void)  	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;  	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; -	mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; -	mem_types[MT_MEMORY].prot_pte |= kern_pgprot; +	mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd; +	mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; +	mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; +	mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot;  	mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; -	mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; +	mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;  	mem_types[MT_ROM].prot_sect |= cp->pmd;  	switch (cp->pmd) { @@ -556,8 +641,8 @@ static void __init build_mem_type_table(void)  		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;  		break;  	} -	printk("Memory policy: ECC %sabled, Data cache %s\n", -		ecc_mask ? "en" : "dis", cp->policy); +	pr_info("Memory policy: %sData cache %s\n", +		ecc_mask ? "ECC enabled, " : "", cp->policy);  	for (i = 0; i < ARRAY_SIZE(mem_types); i++) {  		struct mem_type *t = &mem_types[i]; @@ -990,74 +1075,47 @@ phys_addr_t arm_lowmem_limit __initdata = 0;  void __init sanity_check_meminfo(void)  {  	phys_addr_t memblock_limit = 0; -	int i, j, highmem = 0; +	int highmem = 0;  	phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; +	struct memblock_region *reg; -	for (i = 0, j = 0; i < meminfo.nr_banks; i++) { -		struct membank *bank = &meminfo.bank[j]; -		phys_addr_t size_limit; - -		*bank = meminfo.bank[i]; -		size_limit = bank->size; +	for_each_memblock(memory, reg) { +		phys_addr_t block_start = reg->base; +		phys_addr_t block_end = reg->base + reg->size; +		phys_addr_t size_limit = reg->size; -		if (bank->start >= vmalloc_limit) +		if (reg->base >= vmalloc_limit)  			highmem = 1;  		else -			size_limit = vmalloc_limit - bank->start; +			size_limit = vmalloc_limit - reg->base; -		bank->highmem = highmem; -#ifdef CONFIG_HIGHMEM -		/* -		 * Split those memory banks which are partially overlapping -		 * the vmalloc area greatly simplifying things later. -		 */ -		if (!highmem && bank->size > size_limit) { -			if (meminfo.nr_banks >= NR_BANKS) { -				printk(KERN_CRIT "NR_BANKS too low, " -						 "ignoring high memory\n"); -			} else { -				memmove(bank + 1, bank, -					(meminfo.nr_banks - i) * sizeof(*bank)); -				meminfo.nr_banks++; -				i++; -				bank[1].size -= size_limit; -				bank[1].start = vmalloc_limit; -				bank[1].highmem = highmem = 1; -				j++; +		if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) { + +			if (highmem) { +				pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n", +					&block_start, &block_end); +				memblock_remove(reg->base, reg->size); +				continue;  			} -			bank->size = size_limit; -		} -#else -		/* -		 * Highmem banks not allowed with !CONFIG_HIGHMEM. -		 */ -		if (highmem) { -			printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx " -			       "(!CONFIG_HIGHMEM).\n", -			       (unsigned long long)bank->start, -			       (unsigned long long)bank->start + bank->size - 1); -			continue; -		} -		/* -		 * Check whether this memory bank would partially overlap -		 * the vmalloc area. -		 */ -		if (bank->size > size_limit) { -			printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx " -			       "to -%.8llx (vmalloc region overlap).\n", -			       (unsigned long long)bank->start, -			       (unsigned long long)bank->start + bank->size - 1, -			       (unsigned long long)bank->start + size_limit - 1); -			bank->size = size_limit; +			if (reg->size > size_limit) { +				phys_addr_t overlap_size = reg->size - size_limit; + +				pr_notice("Truncating RAM at %pa-%pa to -%pa", +				      &block_start, &block_end, &vmalloc_limit); +				memblock_remove(vmalloc_limit, overlap_size); +				block_end = vmalloc_limit; +			}  		} -#endif -		if (!bank->highmem) { -			phys_addr_t bank_end = bank->start + bank->size; -			if (bank_end > arm_lowmem_limit) -				arm_lowmem_limit = bank_end; +		if (!highmem) { +			if (block_end > arm_lowmem_limit) { +				if (reg->size > size_limit) +					arm_lowmem_limit = vmalloc_limit; +				else +					arm_lowmem_limit = block_end; +			}  			/*  			 * Find the first non-section-aligned page, and point @@ -1073,35 +1131,15 @@ void __init sanity_check_meminfo(void)  			 * occurs before any free memory is mapped.  			 */  			if (!memblock_limit) { -				if (!IS_ALIGNED(bank->start, SECTION_SIZE)) -					memblock_limit = bank->start; -				else if (!IS_ALIGNED(bank_end, SECTION_SIZE)) -					memblock_limit = bank_end; +				if (!IS_ALIGNED(block_start, SECTION_SIZE)) +					memblock_limit = block_start; +				else if (!IS_ALIGNED(block_end, SECTION_SIZE)) +					memblock_limit = arm_lowmem_limit;  			} -		} -		j++; -	} -#ifdef CONFIG_HIGHMEM -	if (highmem) { -		const char *reason = NULL; -		if (cache_is_vipt_aliasing()) { -			/* -			 * Interactions between kmap and other mappings -			 * make highmem support with aliasing VIPT caches -			 * rather difficult. -			 */ -			reason = "with VIPT aliasing cache"; -		} -		if (reason) { -			printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n", -				reason); -			while (j > 0 && meminfo.bank[j - 1].highmem) -				j--;  		}  	} -#endif -	meminfo.nr_banks = j; +  	high_memory = __va(arm_lowmem_limit - 1) + 1;  	/* @@ -1288,12 +1326,17 @@ static void __init kmap_init(void)  #ifdef CONFIG_HIGHMEM  	pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),  		PKMAP_BASE, _PAGE_KERNEL_TABLE); + +	fixmap_page_table = early_pte_alloc(pmd_off_k(FIXADDR_START), +		FIXADDR_START, _PAGE_KERNEL_TABLE);  #endif  }  static void __init map_lowmem(void)  {  	struct memblock_region *reg; +	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); +	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);  	/* Map all the lowmem memory banks. */  	for_each_memblock(memory, reg) { @@ -1306,15 +1349,123 @@ static void __init map_lowmem(void)  		if (start >= end)  			break; -		map.pfn = __phys_to_pfn(start); -		map.virtual = __phys_to_virt(start); -		map.length = end - start; -		map.type = MT_MEMORY; +		if (end < kernel_x_start || start >= kernel_x_end) { +			map.pfn = __phys_to_pfn(start); +			map.virtual = __phys_to_virt(start); +			map.length = end - start; +			map.type = MT_MEMORY_RWX; -		create_mapping(&map); +			create_mapping(&map); +		} else { +			/* This better cover the entire kernel */ +			if (start < kernel_x_start) { +				map.pfn = __phys_to_pfn(start); +				map.virtual = __phys_to_virt(start); +				map.length = kernel_x_start - start; +				map.type = MT_MEMORY_RW; + +				create_mapping(&map); +			} + +			map.pfn = __phys_to_pfn(kernel_x_start); +			map.virtual = __phys_to_virt(kernel_x_start); +			map.length = kernel_x_end - kernel_x_start; +			map.type = MT_MEMORY_RWX; + +			create_mapping(&map); + +			if (kernel_x_end < end) { +				map.pfn = __phys_to_pfn(kernel_x_end); +				map.virtual = __phys_to_virt(kernel_x_end); +				map.length = end - kernel_x_end; +				map.type = MT_MEMORY_RW; + +				create_mapping(&map); +			} +		}  	}  } +#ifdef CONFIG_ARM_LPAE +/* + * early_paging_init() recreates boot time page table setup, allowing machines + * to switch over to a high (>4G) address space on LPAE systems + */ +void __init early_paging_init(const struct machine_desc *mdesc, +			      struct proc_info_list *procinfo) +{ +	pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags; +	unsigned long map_start, map_end; +	pgd_t *pgd0, *pgdk; +	pud_t *pud0, *pudk, *pud_start; +	pmd_t *pmd0, *pmdk; +	phys_addr_t phys; +	int i; + +	if (!(mdesc->init_meminfo)) +		return; + +	/* remap kernel code and data */ +	map_start = init_mm.start_code & PMD_MASK; +	map_end   = ALIGN(init_mm.brk, PMD_SIZE); + +	/* get a handle on things... */ +	pgd0 = pgd_offset_k(0); +	pud_start = pud0 = pud_offset(pgd0, 0); +	pmd0 = pmd_offset(pud0, 0); + +	pgdk = pgd_offset_k(map_start); +	pudk = pud_offset(pgdk, map_start); +	pmdk = pmd_offset(pudk, map_start); + +	mdesc->init_meminfo(); + +	/* Run the patch stub to update the constants */ +	fixup_pv_table(&__pv_table_begin, +		(&__pv_table_end - &__pv_table_begin) << 2); + +	/* +	 * Cache cleaning operations for self-modifying code +	 * We should clean the entries by MVA but running a +	 * for loop over every pv_table entry pointer would +	 * just complicate the code. +	 */ +	flush_cache_louis(); +	dsb(ishst); +	isb(); + +	/* remap level 1 table */ +	for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { +		set_pud(pud0, +			__pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); +		pmd0 += PTRS_PER_PMD; +	} + +	/* remap pmds for kernel mapping */ +	phys = __pa(map_start); +	do { +		*pmdk++ = __pmd(phys | pmdprot); +		phys += PMD_SIZE; +	} while (phys < map_end); + +	flush_cache_all(); +	cpu_switch_mm(pgd0, &init_mm); +	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); +	local_flush_bp_all(); +	local_flush_tlb_all(); +} + +#else + +void __init early_paging_init(const struct machine_desc *mdesc, +			      struct proc_info_list *procinfo) +{ +	if (mdesc->init_meminfo) +		mdesc->init_meminfo(); +} + +#endif +  /*   * paging_init() sets up the page tables, initialises the zone memory   * maps, and sets up the zero page, bad page and bad page tables. diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 34d4ab217ba..a014dfacd5c 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -18,6 +18,7 @@  #include <asm/mach/arch.h>  #include <asm/cputype.h>  #include <asm/mpu.h> +#include <asm/procinfo.h>  #include "mm.h" @@ -87,30 +88,35 @@ static unsigned long irbar_read(void)  void __init sanity_check_meminfo_mpu(void)  {  	int i; -	struct membank *bank = meminfo.bank;  	phys_addr_t phys_offset = PHYS_OFFSET;  	phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size; - -	/* Initially only use memory continuous from PHYS_OFFSET */ -	if (bank_phys_start(&bank[0]) != phys_offset) -		panic("First memory bank must be contiguous from PHYS_OFFSET"); - -	/* Banks have already been sorted by start address */ -	for (i = 1; i < meminfo.nr_banks; i++) { -		if (bank[i].start <= bank_phys_end(&bank[0]) && -		    bank_phys_end(&bank[i]) > bank_phys_end(&bank[0])) { -			bank[0].size = bank_phys_end(&bank[i]) - bank[0].start; +	struct memblock_region *reg; +	bool first = true; +	phys_addr_t mem_start; +	phys_addr_t mem_end; + +	for_each_memblock(memory, reg) { +		if (first) { +			/* +			 * Initially only use memory continuous from +			 * PHYS_OFFSET */ +			if (reg->base != phys_offset) +				panic("First memory bank must be contiguous from PHYS_OFFSET"); + +			mem_start = reg->base; +			mem_end = reg->base + reg->size; +			specified_mem_size = reg->size; +			first = false;  		} else { -			pr_notice("Ignoring RAM after 0x%.8lx. " -			"First non-contiguous (ignored) bank start: 0x%.8lx\n", -				(unsigned long)bank_phys_end(&bank[0]), -				(unsigned long)bank_phys_start(&bank[i])); -			break; +			/* +			 * memblock auto merges contiguous blocks, remove +			 * all blocks afterwards +			 */ +			pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", +				  &mem_start, ®->base); +			memblock_remove(reg->base, reg->size);  		}  	} -	/* All contiguous banks are now merged in to the first bank */ -	meminfo.nr_banks = 1; -	specified_mem_size = bank[0].size;  	/*  	 * MPU has curious alignment requirements: Size must be power of 2, and @@ -127,23 +133,24 @@ void __init sanity_check_meminfo_mpu(void)  	 */  	aligned_region_size = (phys_offset - 1) ^ (phys_offset);  	/* Find the max power-of-two sized region that fits inside our bank */ -	rounded_mem_size = (1 <<  __fls(bank[0].size)) - 1; +	rounded_mem_size = (1 <<  __fls(specified_mem_size)) - 1;  	/* The actual region size is the smaller of the two */  	aligned_region_size = aligned_region_size < rounded_mem_size  				? aligned_region_size + 1  				: rounded_mem_size + 1; -	if (aligned_region_size != specified_mem_size) -		pr_warn("Truncating memory from 0x%.8lx to 0x%.8lx (MPU region constraints)", -				(unsigned long)specified_mem_size, -				(unsigned long)aligned_region_size); +	if (aligned_region_size != specified_mem_size) { +		pr_warn("Truncating memory from %pa to %pa (MPU region constraints)", +				&specified_mem_size, &aligned_region_size); +		memblock_remove(mem_start + aligned_region_size, +				specified_mem_size - aligned_round_size); -	meminfo.bank[0].size = aligned_region_size; -	pr_debug("MPU Region from 0x%.8lx size 0x%.8lx (end 0x%.8lx))\n", -		(unsigned long)phys_offset, -		(unsigned long)aligned_region_size, -		(unsigned long)bank_phys_end(&bank[0])); +		mem_end = mem_start + aligned_region_size; +	} + +	pr_debug("MPU Region from %pa size %pa (end %pa))\n", +		&phys_offset, &aligned_region_size, &mem_end);  } @@ -291,8 +298,18 @@ void __init sanity_check_meminfo(void)  {  	phys_addr_t end;  	sanity_check_meminfo_mpu(); -	end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]); +	end = memblock_end_of_DRAM();  	high_memory = __va(end - 1) + 1; +	memblock_set_current_limit(end); +} + +/* + * early_paging_init() recreates boot time page table setup, allowing machines + * to switch over to a high (>4G) address space on LPAE systems + */ +void __init early_paging_init(const struct machine_desc *mdesc, +			      struct proc_info_list *procinfo) +{  }  /* diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index 0acb089d0f7..249379535be 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -23,7 +23,7 @@  #define __pgd_alloc()	kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL)  #define __pgd_free(pgd)	kfree(pgd)  #else -#define __pgd_alloc()	(pgd_t *)__get_free_pages(GFP_KERNEL, 2) +#define __pgd_alloc()	(pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, 2)  #define __pgd_free(pgd)	free_pages((unsigned long)pgd, 2)  #endif @@ -87,7 +87,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)  		init_pud = pud_offset(init_pgd, 0);  		init_pmd = pmd_offset(init_pud, 0);  		init_pte = pte_offset_map(init_pmd, 0); -		set_pte_ext(new_pte, *init_pte, 0); +		set_pte_ext(new_pte + 0, init_pte[0], 0); +		set_pte_ext(new_pte + 1, init_pte[1], 0);  		pte_unmap(init_pte);  		pte_unmap(new_pte);  	} diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index 97448c3acf3..ba0d58e1a2a 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -502,6 +502,7 @@ __\name\()_proc_info:  	.long	\cpu_val  	.long	\cpu_mask  	.long   PMD_TYPE_SECT | \ +		PMD_SECT_CACHEABLE | \  		PMD_BIT4 | \  		PMD_SECT_AP_WRITE | \  		PMD_SECT_AP_READ diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index e3c48a3fe06..ee1d8059395 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -112,13 +112,9 @@   *  100x   1   0   1	r/o	no acc   *  10x0   1   0   1	r/o	no acc   *  1011   0   0   1	r/w	no acc - *  110x   0   1   0	r/w	r/o - *  11x0   0   1   0	r/w	r/o - *  1111   0   1   1	r/w	r/w - * - * If !CONFIG_CPU_USE_DOMAINS, the following permissions are changed:   *  110x   1   1   1	r/o	r/o   *  11x0   1   1   1	r/o	r/o + *  1111   0   1   1	r/w	r/w   */  	.macro	armv6_mt_table pfx  \pfx\()_mt_table: @@ -137,7 +133,7 @@  	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED  	.long	0x00						@ unused  	.long	0x00						@ unused -	.long	0x00						@ unused +	.long	PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX	@ L_PTE_MT_VECTORS  	.endm  	.macro	armv6_set_pte_ext pfx @@ -158,24 +154,21 @@  	tst	r1, #L_PTE_USER  	orrne	r3, r3, #PTE_EXT_AP1 -#ifdef CONFIG_CPU_USE_DOMAINS -	@ allow kernel read/write access to read-only user pages  	tstne	r3, #PTE_EXT_APX -	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 -#endif + +	@ user read-only -> kernel read-only +	bicne	r3, r3, #PTE_EXT_AP0  	tst	r1, #L_PTE_XN  	orrne	r3, r3, #PTE_EXT_XN -	orr	r3, r3, r2 +	eor	r3, r3, r2  	tst	r1, #L_PTE_YOUNG  	tstne	r1, #L_PTE_PRESENT  	moveq	r3, #0 -#ifndef CONFIG_CPU_USE_DOMAINS  	tstne	r1, #L_PTE_NONE  	movne	r3, #0 -#endif  	str	r3, [r0]  	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 1128064fddc..32b3558321c 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -208,7 +208,6 @@ __v6_setup:  	mcr	p15, 0, r0, c7, c14, 0		@ clean+invalidate D cache  	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache  	mcr	p15, 0, r0, c7, c15, 0		@ clean+invalidate cache -	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer  #ifdef CONFIG_MMU  	mcr	p15, 0, r0, c8, c7, 0		@ invalidate I + D TLBs  	mcr	p15, 0, r0, c2, c0, 2		@ TTB control register @@ -218,11 +217,11 @@ __v6_setup:  	ALT_UP(orr	r8, r8, #TTB_FLAGS_UP)  	mcr	p15, 0, r8, c2, c0, 1		@ load TTB1  #endif /* CONFIG_MMU */ +	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer and +						@ complete invalidations  	adr	r5, v6_crval  	ldmia	r5, {r5, r6} -#ifdef CONFIG_CPU_ENDIAN_BE8 -	orr	r6, r6, #1 << 25		@ big-endian page tables -#endif + ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables  	mrc	p15, 0, r0, c1, c0, 0		@ read control register  	bic	r0, r0, r5			@ clear bits them  	orr	r0, r0, r6			@ set them diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index bdd3be4be77..1f52915f2b2 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -90,21 +90,14 @@ ENTRY(cpu_v7_set_pte_ext)  	tst	r1, #L_PTE_USER  	orrne	r3, r3, #PTE_EXT_AP1 -#ifdef CONFIG_CPU_USE_DOMAINS -	@ allow kernel read/write access to read-only user pages -	tstne	r3, #PTE_EXT_APX -	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 -#endif  	tst	r1, #L_PTE_XN  	orrne	r3, r3, #PTE_EXT_XN  	tst	r1, #L_PTE_YOUNG  	tstne	r1, #L_PTE_VALID -#ifndef CONFIG_CPU_USE_DOMAINS  	eorne	r1, r1, #L_PTE_NONE  	tstne	r1, #L_PTE_NONE -#endif  	moveq	r3, #0   ARM(	str	r3, [r0, #2048]! ) diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 01a719e18bb..22e3ad63500 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -64,6 +64,14 @@ ENTRY(cpu_v7_switch_mm)  	mov	pc, lr  ENDPROC(cpu_v7_switch_mm) +#ifdef __ARMEB__ +#define rl r3 +#define rh r2 +#else +#define rl r2 +#define rh r3 +#endif +  /*   * cpu_v7_set_pte_ext(ptep, pte)   * @@ -73,13 +81,13 @@ ENDPROC(cpu_v7_switch_mm)   */  ENTRY(cpu_v7_set_pte_ext)  #ifdef CONFIG_MMU -	tst	r2, #L_PTE_VALID +	tst	rl, #L_PTE_VALID  	beq	1f -	tst	r3, #1 << (57 - 32)		@ L_PTE_NONE -	bicne	r2, #L_PTE_VALID +	tst	rh, #1 << (57 - 32)		@ L_PTE_NONE +	bicne	rl, #L_PTE_VALID  	bne	1f -	tst	r3, #1 << (55 - 32)		@ L_PTE_DIRTY -	orreq	r2, #L_PTE_RDONLY +	tst	rh, #1 << (55 - 32)		@ L_PTE_DIRTY +	orreq	rl, #L_PTE_RDONLY  1:	strd	r2, r3, [r0]  	ALT_SMP(W(nop))  	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index c63d9bdee51..3db2c2f04a3 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -92,7 +92,7 @@ ENDPROC(cpu_v7_dcache_clean_area)  /* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */  .globl	cpu_v7_suspend_size -.equ	cpu_v7_suspend_size, 4 * 8 +.equ	cpu_v7_suspend_size, 4 * 9  #ifdef CONFIG_ARM_CPU_SUSPEND  ENTRY(cpu_v7_do_suspend)  	stmfd	sp!, {r4 - r10, lr} @@ -101,13 +101,17 @@ ENTRY(cpu_v7_do_suspend)  	stmia	r0!, {r4 - r5}  #ifdef CONFIG_MMU  	mrc	p15, 0, r6, c3, c0, 0	@ Domain ID +#ifdef CONFIG_ARM_LPAE +	mrrc	p15, 1, r5, r7, c2	@ TTB 1 +#else  	mrc	p15, 0, r7, c2, c0, 1	@ TTB 1 +#endif  	mrc	p15, 0, r11, c2, c0, 2	@ TTB control register  #endif  	mrc	p15, 0, r8, c1, c0, 0	@ Control register  	mrc	p15, 0, r9, c1, c0, 1	@ Auxiliary control register  	mrc	p15, 0, r10, c1, c0, 2	@ Co-processor access control -	stmia	r0, {r6 - r11} +	stmia	r0, {r5 - r11}  	ldmfd	sp!, {r4 - r10, pc}  ENDPROC(cpu_v7_do_suspend) @@ -118,16 +122,19 @@ ENTRY(cpu_v7_do_resume)  	ldmia	r0!, {r4 - r5}  	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID  	mcr	p15, 0, r5, c13, c0, 3	@ User r/o thread ID -	ldmia	r0, {r6 - r11} +	ldmia	r0, {r5 - r11}  #ifdef CONFIG_MMU  	mcr	p15, 0, ip, c8, c7, 0	@ invalidate TLBs  	mcr	p15, 0, r6, c3, c0, 0	@ Domain ID -#ifndef CONFIG_ARM_LPAE +#ifdef CONFIG_ARM_LPAE +	mcrr	p15, 0, r1, ip, c2	@ TTB 0 +	mcrr	p15, 1, r5, r7, c2	@ TTB 1 +#else  	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)  	ALT_UP(orr	r1, r1, #TTB_FLAGS_UP) -#endif  	mcr	p15, 0, r1, c2, c0, 0	@ TTB 0  	mcr	p15, 0, r7, c2, c0, 1	@ TTB 1 +#endif  	mcr	p15, 0, r11, c2, c0, 2	@ TTB control register  	ldr	r4, =PRRR		@ PRRR  	ldr	r5, =NMRR		@ NMRR @@ -162,9 +169,31 @@ ENDPROC(cpu_pj4b_do_idle)  	globl_equ	cpu_pj4b_do_idle,  	cpu_v7_do_idle  #endif  	globl_equ	cpu_pj4b_dcache_clean_area,	cpu_v7_dcache_clean_area -	globl_equ	cpu_pj4b_do_suspend,	cpu_v7_do_suspend -	globl_equ	cpu_pj4b_do_resume,	cpu_v7_do_resume -	globl_equ	cpu_pj4b_suspend_size,	cpu_v7_suspend_size +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_pj4b_do_suspend) +	stmfd	sp!, {r6 - r10} +	mrc	p15, 1, r6, c15, c1, 0  @ save CP15 - extra features +	mrc	p15, 1, r7, c15, c2, 0	@ save CP15 - Aux Func Modes Ctrl 0 +	mrc	p15, 1, r8, c15, c1, 2	@ save CP15 - Aux Debug Modes Ctrl 2 +	mrc	p15, 1, r9, c15, c1, 1  @ save CP15 - Aux Debug Modes Ctrl 1 +	mrc	p15, 0, r10, c9, c14, 0  @ save CP15 - PMC +	stmia	r0!, {r6 - r10} +	ldmfd	sp!, {r6 - r10} +	b cpu_v7_do_suspend +ENDPROC(cpu_pj4b_do_suspend) + +ENTRY(cpu_pj4b_do_resume) +	ldmia	r0!, {r6 - r10} +	mcr	p15, 1, r6, c15, c1, 0  @ save CP15 - extra features +	mcr	p15, 1, r7, c15, c2, 0	@ save CP15 - Aux Func Modes Ctrl 0 +	mcr	p15, 1, r8, c15, c1, 2	@ save CP15 - Aux Debug Modes Ctrl 2 +	mcr	p15, 1, r9, c15, c1, 1  @ save CP15 - Aux Debug Modes Ctrl 1 +	mcr	p15, 0, r10, c9, c14, 0  @ save CP15 - PMC +	b cpu_v7_do_resume +ENDPROC(cpu_pj4b_do_resume) +#endif +.globl	cpu_pj4b_suspend_size +.equ	cpu_pj4b_suspend_size, 4 * 14  #endif @@ -185,7 +214,9 @@ __v7_cr7mp_setup:  	mov	r10, #(1 << 0)			@ Cache/TLB ops broadcasting  	b	1f  __v7_ca7mp_setup: +__v7_ca12mp_setup:  __v7_ca15mp_setup: +__v7_ca17mp_setup:  	mov	r10, #0  1:  #ifdef CONFIG_SMP @@ -344,7 +375,6 @@ __v7_setup:  4:	mov	r10, #0  	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate -	dsb  #ifdef CONFIG_MMU  	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs  	v7_ttb_setup r10, r4, r8, r5		@ TTBCR, TTBRx setup @@ -353,6 +383,7 @@ __v7_setup:  	mcr	p15, 0, r5, c10, c2, 0		@ write PRRR  	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR  #endif +	dsb					@ Complete invalidations  #ifndef CONFIG_ARM_THUMBEE  	mrc	p15, 0, r0, c0, c1, 0		@ read ID_PFR0 for ThumbEE  	and	r0, r0, #(0xf << 12)		@ ThumbEE enabled field @@ -367,9 +398,7 @@ __v7_setup:  #endif  	adr	r5, v7_crval  	ldmia	r5, {r5, r6} -#ifdef CONFIG_CPU_ENDIAN_BE8 -	orr	r6, r6, #1 << 25		@ big-endian page tables -#endif + ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables  #ifdef CONFIG_SWP_EMULATE  	orr     r5, r5, #(1 << 10)              @ set SW bit in "clear"  	bic     r6, r6, #(1 << 10)              @ clear it in "mmuset" @@ -479,6 +508,16 @@ __v7_ca7mp_proc_info:  	.size	__v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info  	/* +	 * ARM Ltd. Cortex A12 processor. +	 */ +	.type	__v7_ca12mp_proc_info, #object +__v7_ca12mp_proc_info: +	.long	0x410fc0d0 +	.long	0xff0ffff0 +	__v7_proc __v7_ca12mp_setup +	.size	__v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info + +	/*  	 * ARM Ltd. Cortex A15 processor.  	 */  	.type	__v7_ca15mp_proc_info, #object @@ -489,6 +528,16 @@ __v7_ca15mp_proc_info:  	.size	__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info  	/* +	 * ARM Ltd. Cortex A17 processor. +	 */ +	.type	__v7_ca17mp_proc_info, #object +__v7_ca17mp_proc_info: +	.long	0x410fc0e0 +	.long	0xff0ffff0 +	__v7_proc __v7_ca17mp_setup +	.size	__v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info + +	/*  	 * Qualcomm Inc. Krait processors.  	 */  	.type	__krait_proc_info, #object diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 0c93588fcb9..1ca37c72f12 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -123,6 +123,11 @@ __v7m_setup:  	mov	pc, lr  ENDPROC(__v7m_setup) +	.align 2 +__v7m_setup_stack: +	.space	4 * 8				@ 8 registers +__v7m_setup_stack_top: +  	define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1  	.section ".rodata" @@ -152,6 +157,3 @@ __v7m_proc_info:  	.long	nop_cache_fns		@ proc_info_list.cache  	.size	__v7m_proc_info, . - __v7m_proc_info -__v7m_setup_stack: -	.space	4 * 8				@ 8 registers -__v7m_setup_stack_top:  | 
