aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/512x/mpc5121_generic.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/512x/mpc5121_generic.c')
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_generic.c52
1 files changed, 0 insertions, 52 deletions
diff --git a/arch/powerpc/platforms/512x/mpc5121_generic.c b/arch/powerpc/platforms/512x/mpc5121_generic.c
deleted file mode 100644
index 926731f1ff0..00000000000
--- a/arch/powerpc/platforms/512x/mpc5121_generic.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: John Rigby, <jrigby@freescale.com>
- *
- * Description:
- * MPC5121 SoC setup
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/of_platform.h>
-
-#include <asm/machdep.h>
-#include <asm/ipic.h>
-#include <asm/prom.h>
-#include <asm/time.h>
-
-#include "mpc512x.h"
-
-/*
- * list of supported boards
- */
-static const char *board[] __initdata = {
- "prt,prtlvt",
- NULL
-};
-
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc5121_generic_probe(void)
-{
- return of_flat_dt_match(of_get_flat_dt_root(), board);
-}
-
-define_machine(mpc5121_generic) {
- .name = "MPC5121 generic",
- .probe = mpc5121_generic_probe,
- .init = mpc512x_init,
- .init_early = mpc512x_init_diu,
- .setup_arch = mpc512x_setup_diu,
- .init_IRQ = mpc512x_init_IRQ,
- .get_irq = ipic_get_irq,
- .calibrate_decr = generic_calibrate_decr,
- .restart = mpc512x_restart,
-};
'>11
-rw-r--r--arch/arm/mm/alignment.c370
-rw-r--r--arch/arm/mm/cache-aurora-l2.h55
-rw-r--r--arch/arm/mm/cache-fa.S249
-rw-r--r--arch/arm/mm/cache-feroceon-l2.c396
-rw-r--r--arch/arm/mm/cache-l2x0.c1517
-rw-r--r--arch/arm/mm/cache-nop.S50
-rw-r--r--arch/arm/mm/cache-tauros2.c302
-rw-r--r--arch/arm/mm/cache-tauros3.h41
-rw-r--r--arch/arm/mm/cache-v3.S137
-rw-r--r--arch/arm/mm/cache-v4.S82
-rw-r--r--arch/arm/mm/cache-v4wb.S73
-rw-r--r--arch/arm/mm/cache-v4wt.S77
-rw-r--r--arch/arm/mm/cache-v6.S146
-rw-r--r--arch/arm/mm/cache-v7.S283
-rw-r--r--arch/arm/mm/cache-xsc3l2.c220
-rw-r--r--arch/arm/mm/consistent.c507
-rw-r--r--arch/arm/mm/context.c264
-rw-r--r--arch/arm/mm/copypage-fa.c86
-rw-r--r--arch/arm/mm/copypage-feroceon.c112
-rw-r--r--arch/arm/mm/copypage-v3.S67
-rw-r--r--arch/arm/mm/copypage-v4mc.c70
-rw-r--r--arch/arm/mm/copypage-v4wb.S79
-rw-r--r--arch/arm/mm/copypage-v4wb.c95
-rw-r--r--arch/arm/mm/copypage-v4wt.S73
-rw-r--r--arch/arm/mm/copypage-v4wt.c88
-rw-r--r--arch/arm/mm/copypage-v6.c101
-rw-r--r--arch/arm/mm/copypage-xsc3.S97
-rw-r--r--arch/arm/mm/copypage-xsc3.c114
-rw-r--r--arch/arm/mm/copypage-xscale.c64
-rw-r--r--arch/arm/mm/discontig.c47
-rw-r--r--arch/arm/mm/dma-mapping.c2092
-rw-r--r--arch/arm/mm/dump.c365
-rw-r--r--arch/arm/mm/extable.c9
-rw-r--r--arch/arm/mm/fault-armv.c166
-rw-r--r--arch/arm/mm/fault.c407
-rw-r--r--arch/arm/mm/fault.h27
-rw-r--r--arch/arm/mm/flush.c245
-rw-r--r--arch/arm/mm/fsr-2level.c78
-rw-r--r--arch/arm/mm/fsr-3level.c68
-rw-r--r--arch/arm/mm/highmem.c152
-rw-r--r--arch/arm/mm/hugetlbpage.c58
-rw-r--r--arch/arm/mm/idmap.c136
-rw-r--r--arch/arm/mm/init.c758
-rw-r--r--arch/arm/mm/iomap.c27
-rw-r--r--arch/arm/mm/ioremap.c371
-rw-r--r--arch/arm/mm/l2c-common.c20
-rw-r--r--arch/arm/mm/l2c-l2x0-resume.S58
-rw-r--r--arch/arm/mm/mm.h94
-rw-r--r--arch/arm/mm/mmap.c213
-rw-r--r--arch/arm/mm/mmu.c1264
-rw-r--r--arch/arm/mm/nommu.c347
-rw-r--r--arch/arm/mm/pabort-legacy.S21
-rw-r--r--arch/arm/mm/pabort-v6.S21
-rw-r--r--arch/arm/mm/pabort-v7.S21
-rw-r--r--arch/arm/mm/pgd.c127
-rw-r--r--arch/arm/mm/proc-arm1020.S134
-rw-r--r--arch/arm/mm/proc-arm1020e.S141
-rw-r--r--arch/arm/mm/proc-arm1022.S141
-rw-r--r--arch/arm/mm/proc-arm1026.S143
-rw-r--r--arch/arm/mm/proc-arm6_7.S436
-rw-r--r--arch/arm/mm/proc-arm720.S121
-rw-r--r--arch/arm/mm/proc-arm740.S81
-rw-r--r--arch/arm/mm/proc-arm7tdmi.S221
-rw-r--r--arch/arm/mm/proc-arm920.S167
-rw-r--r--arch/arm/mm/proc-arm922.S141
-rw-r--r--arch/arm/mm/proc-arm925.S177
-rw-r--r--arch/arm/mm/proc-arm926.S165
-rw-r--r--arch/arm/mm/proc-arm940.S115
-rw-r--r--arch/arm/mm/proc-arm946.S117
-rw-r--r--arch/arm/mm/proc-arm9tdmi.S83
-rw-r--r--arch/arm/mm/proc-fa526.S218
-rw-r--r--arch/arm/mm/proc-feroceon.S626
-rw-r--r--arch/arm/mm/proc-macros.S272
-rw-r--r--arch/arm/mm/proc-mohawk.S455
-rw-r--r--arch/arm/mm/proc-sa110.S71
-rw-r--r--arch/arm/mm/proc-sa1100.S144
-rw-r--r--arch/arm/mm/proc-syms.c7
-rw-r--r--arch/arm/mm/proc-v6.S221
-rw-r--r--arch/arm/mm/proc-v7-2level.S165
-rw-r--r--arch/arm/mm/proc-v7-3level.S161
-rw-r--r--arch/arm/mm/proc-v7.S606
-rw-r--r--arch/arm/mm/proc-v7m.S159
-rw-r--r--arch/arm/mm/proc-xsc3.S248
-rw-r--r--arch/arm/mm/proc-xscale.S688
-rw-r--r--arch/arm/mm/tcm.h17
-rw-r--r--arch/arm/mm/tlb-fa.S69
-rw-r--r--arch/arm/mm/tlb-v3.S52
-rw-r--r--arch/arm/mm/tlb-v4.S8
-rw-r--r--arch/arm/mm/tlb-v4wb.S8
-rw-r--r--arch/arm/mm/tlb-v4wbi.S8
-rw-r--r--arch/arm/mm/tlb-v6.S15
-rw-r--r--arch/arm/mm/tlb-v7.S49
103 files changed, 15111 insertions, 5562 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 7868f4dc1d0..c348eaee7ee 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -1,30 +1,9 @@
comment "Processor Type"
-config CPU_32
- bool
- default y
-
# Select CPU types depending on the architecture selected. This selects
# which CPUs we support in the kernel image, and the compiler instruction
# optimiser behaviour.
-# ARM610
-config CPU_ARM610
- bool "Support ARM610 processor"
- depends on ARCH_RPC
- select CPU_32v3
- select CPU_CACHE_V3
- select CPU_CACHE_VIVT
- select CPU_CP15_MMU
- select CPU_COPY_V3 if MMU
- select CPU_TLB_V3 if MMU
- help
- The ARM610 is the successor to the ARM3 processor
- and was produced by VLSI Technology Inc.
-
- Say Y if you want support for the ARM610 processor.
- Otherwise, say N.
-
# ARM7TDMI
config CPU_ARM7TDMI
bool "Support ARM7TDMI processor"
@@ -32,6 +11,7 @@ config CPU_ARM7TDMI
select CPU_32v4T
select CPU_ABRT_LV4T
select CPU_CACHE_V4
+ select CPU_PABRT_LEGACY
help
A 32-bit RISC microprocessor based on the ARM7 processor core
which has no memory control unit and cache.
@@ -39,35 +19,16 @@ config CPU_ARM7TDMI
Say Y if you want support for the ARM7TDMI processor.
Otherwise, say N.
-# ARM710
-config CPU_ARM710
- bool "Support ARM710 processor" if !ARCH_CLPS7500 && ARCH_RPC
- default y if ARCH_CLPS7500
- select CPU_32v3
- select CPU_CACHE_V3
- select CPU_CACHE_VIVT
- select CPU_CP15_MMU
- select CPU_COPY_V3 if MMU
- select CPU_TLB_V3 if MMU
- help
- A 32-bit RISC microprocessor based on the ARM7 processor core
- designed by Advanced RISC Machines Ltd. The ARM710 is the
- successor to the ARM610 processor. It was released in
- July 1994 by VLSI Technology Inc.
-
- Say Y if you want support for the ARM710 processor.
- Otherwise, say N.
-
# ARM720T
config CPU_ARM720T
- bool "Support ARM720T processor" if !ARCH_CLPS711X && !ARCH_L7200 && !ARCH_CDB89712 && ARCH_INTEGRATOR
- default y if ARCH_CLPS711X || ARCH_L7200 || ARCH_CDB89712 || ARCH_H720X
+ bool "Support ARM720T processor" if ARCH_INTEGRATOR
select CPU_32v4T
select CPU_ABRT_LV4T
select CPU_CACHE_V4
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WT if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WT if MMU
help
A 32-bit RISC processor with 8kByte Cache, Write Buffer and
@@ -82,8 +43,9 @@ config CPU_ARM740T
depends on !MMU
select CPU_32v4T
select CPU_ABRT_LV4T
- select CPU_CACHE_V3 # although the core is v4t
+ select CPU_CACHE_V4
select CPU_CP15_MPU
+ select CPU_PABRT_LEGACY
help
A 32-bit RISC processor with 8KB cache or 4KB variants,
write buffer and MPU(Protection Unit) built around
@@ -99,6 +61,7 @@ config CPU_ARM9TDMI
select CPU_32v4T
select CPU_ABRT_NOMMU
select CPU_CACHE_V4
+ select CPU_PABRT_LEGACY
help
A 32-bit RISC microprocessor based on the ARM9 processor core
which has no memory control unit and cache.
@@ -108,22 +71,18 @@ config CPU_ARM9TDMI
# ARM920T
config CPU_ARM920T
- bool "Support ARM920T processor"
- depends on ARCH_EP93XX || ARCH_INTEGRATOR || CPU_S3C2410 || CPU_S3C2440 || CPU_S3C2442 || ARCH_IMX || ARCH_AAEC2000 || ARCH_AT91RM9200
- default y if CPU_S3C2410 || CPU_S3C2440 || CPU_S3C2442 || ARCH_AT91RM9200
+ bool "Support ARM920T processor" if ARCH_INTEGRATOR
select CPU_32v4T
select CPU_ABRT_EV4T
select CPU_CACHE_V4WT
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM920T is licensed to be produced by numerous vendors,
- and is used in the Maverick EP9312 and the Samsung S3C2410.
-
- More information on the Maverick EP9312 at
- <http://linuxdevices.com/products/PD2382866068.html>.
+ and is used in the Cirrus EP93xx and the Samsung S3C2410.
Say Y if you want support for the ARM920T processor.
Otherwise, say N.
@@ -131,14 +90,13 @@ config CPU_ARM920T
# ARM922T
config CPU_ARM922T
bool "Support ARM922T processor" if ARCH_INTEGRATOR
- depends on ARCH_LH7A40X || ARCH_INTEGRATOR || ARCH_KS8695
- default y if ARCH_LH7A40X || ARCH_KS8695
select CPU_32v4T
select CPU_ABRT_EV4T
select CPU_CACHE_V4WT
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM922T is a version of the ARM920T, but with smaller
@@ -151,14 +109,13 @@ config CPU_ARM922T
# ARM925T
config CPU_ARM925T
bool "Support ARM925T processor" if ARCH_OMAP1
- depends on ARCH_OMAP15XX
- default y if ARCH_OMAP15XX
select CPU_32v4T
select CPU_ABRT_EV4T
select CPU_CACHE_V4WT
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM925T is a mix between the ARM920T and ARM926T, but with
@@ -170,14 +127,13 @@ config CPU_ARM925T
# ARM926T
config CPU_ARM926T
- bool "Support ARM926T processor"
- depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || MACH_REALVIEW_EB || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_NS9XXX || ARCH_DAVINCI
- default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || ARCH_PNX4008 || ARCH_NETX || CPU_S3C2412 || ARCH_AT91SAM9260 || ARCH_AT91SAM9261 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_NS9XXX || ARCH_DAVINCI
+ bool "Support ARM926T processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB
select CPU_32v5
select CPU_ABRT_EV5TJ
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
This is a variant of the ARM920. It has slightly different
@@ -187,6 +143,24 @@ config CPU_ARM926T
Say Y if you want support for the ARM926T processor.
Otherwise, say N.
+# FA526
+config CPU_FA526
+ bool
+ select CPU_32v4
+ select CPU_ABRT_EV4
+ select CPU_CACHE_FA
+ select CPU_CACHE_VIVT
+ select CPU_COPY_FA if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
+ select CPU_TLB_FA if MMU
+ help
+ The FA526 is a version of the ARMv4 compatible processor with
+ Branch Target Buffer, Unified TLB and cache line size 16.
+
+ Say Y if you want support for the FA526 processor.
+ Otherwise, say N.
+
# ARM940T
config CPU_ARM940T
bool "Support ARM940T processor" if ARCH_INTEGRATOR
@@ -195,6 +169,7 @@ config CPU_ARM940T
select CPU_ABRT_NOMMU
select CPU_CACHE_VIVT
select CPU_CP15_MPU
+ select CPU_PABRT_LEGACY
help
ARM940T is a member of the ARM9TDMI family of general-
purpose microprocessors with MPU and separate 4KB
@@ -212,6 +187,7 @@ config CPU_ARM946E
select CPU_ABRT_NOMMU
select CPU_CACHE_VIVT
select CPU_CP15_MPU
+ select CPU_PABRT_LEGACY
help
ARM946E-S is a member of the ARM9E-S family of high-
performance, 32-bit system-on-chip processor solutions.
@@ -222,14 +198,14 @@ config CPU_ARM946E
# ARM1020 - needs validating
config CPU_ARM1020
- bool "Support ARM1020T (rev 0) processor"
- depends on ARCH_INTEGRATOR
+ bool "Support ARM1020T (rev 0) processor" if ARCH_INTEGRATOR
select CPU_32v5
select CPU_ABRT_EV4T
select CPU_CACHE_V4WT
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM1020 is the 32K cached version of the ARM10 processor,
@@ -240,26 +216,26 @@ config CPU_ARM1020
# ARM1020E - needs validating
config CPU_ARM1020E
- bool "Support ARM1020E processor"
- depends on ARCH_INTEGRATOR
+ bool "Support ARM1020E processor" if ARCH_INTEGRATOR
+ depends on n
select CPU_32v5
select CPU_ABRT_EV4T
select CPU_CACHE_V4WT
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
- depends on n
# ARM1022E
config CPU_ARM1022
- bool "Support ARM1022E processor"
- depends on ARCH_INTEGRATOR
+ bool "Support ARM1022E processor" if ARCH_INTEGRATOR
select CPU_32v5
select CPU_ABRT_EV4T
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU # can probably do better
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM1022E is an implementation of the ARMv5TE architecture
@@ -271,13 +247,13 @@ config CPU_ARM1022
# ARM1026EJ-S
config CPU_ARM1026
- bool "Support ARM1026EJ-S processor"
- depends on ARCH_INTEGRATOR
+ bool "Support ARM1026EJ-S processor" if ARCH_INTEGRATOR
select CPU_32v5
select CPU_ABRT_EV5T # But need Jazelle, but EV5TJ ignores bit 10
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU # can probably do better
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
help
The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture
@@ -288,15 +264,15 @@ config CPU_ARM1026
# SA110
config CPU_SA110
- bool "Support StrongARM(R) SA-110 processor" if !ARCH_EBSA110 && !FOOTBRIDGE && !ARCH_TBOX && !ARCH_SHARK && !ARCH_NEXUSPCI && ARCH_RPC
- default y if ARCH_EBSA110 || FOOTBRIDGE || ARCH_TBOX || ARCH_SHARK || ARCH_NEXUSPCI
+ bool
select CPU_32v3 if ARCH_RPC
select CPU_32v4 if !ARCH_RPC
select CPU_ABRT_EV4
select CPU_CACHE_V4WB
select CPU_CACHE_VIVT
- select CPU_CP15_MMU
select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WB if MMU
help
The Intel StrongARM(R) SA-110 is a 32-bit microprocessor and
@@ -310,107 +286,177 @@ config CPU_SA110
# SA1100
config CPU_SA1100
bool
- depends on ARCH_SA1100
- default y
select CPU_32v4
select CPU_ABRT_EV4
select CPU_CACHE_V4WB
select CPU_CACHE_VIVT
select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WB if MMU
# XScale
config CPU_XSCALE
bool
- depends on ARCH_IOP32X || ARCH_IOP33X || PXA25x || PXA27x || ARCH_IXP4XX || ARCH_IXP2000
- default y
select CPU_32v5
select CPU_ABRT_EV5T
select CPU_CACHE_VIVT
select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
# XScale Core Version 3
config CPU_XSC3
bool
- depends on ARCH_IXP23XX || ARCH_IOP13XX || PXA3xx
- default y
select CPU_32v5
select CPU_ABRT_EV5T
select CPU_CACHE_VIVT
select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
select CPU_TLB_V4WBI if MMU
select IO_36
+# Marvell PJ1 (Mohawk)
+config CPU_MOHAWK
+ bool
+ select CPU_32v5
+ select CPU_ABRT_EV5T
+ select CPU_CACHE_VIVT
+ select CPU_COPY_V4WB if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
+ select CPU_TLB_V4WBI if MMU
+
+# Feroceon
+config CPU_FEROCEON
+ bool
+ select CPU_32v5
+ select CPU_ABRT_EV5T
+ select CPU_CACHE_VIVT
+ select CPU_COPY_FEROCEON if MMU
+ select CPU_CP15_MMU
+ select CPU_PABRT_LEGACY
+ select CPU_TLB_FEROCEON if MMU
+
+config CPU_FEROCEON_OLD_ID
+ bool "Accept early Feroceon cores with an ARM926 ID"
+ depends on CPU_FEROCEON && !CPU_ARM926T
+ default y
+ help
+ This enables the usage of some old Feroceon cores
+ for which the CPU ID is equal to the ARM926 ID.
+ Relevant for Feroceon-1850 and early Feroceon-2850.
+
+# Marvell PJ4
+config CPU_PJ4
+ bool
+ select ARM_THUMBEE
+ select CPU_V7
+
+config CPU_PJ4B
+ bool
+ select CPU_V7
+
# ARMv6
config CPU_V6
- bool "Support ARM V6 processor"
- depends on ARCH_INTEGRATOR || MACH_REALVIEW_EB || ARCH_OMAP2 || ARCH_MX3
- default y if ARCH_MX3
+ bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX
select CPU_32v6
select CPU_ABRT_EV6
select CPU_CACHE_V6
select CPU_CACHE_VIPT
+ select CPU_COPY_V6 if MMU
select CPU_CP15_MMU
select CPU_HAS_ASID if MMU
- select CPU_COPY_V6 if MMU
+ select CPU_PABRT_V6
select CPU_TLB_V6 if MMU
# ARMv6k
-config CPU_32v6K
- bool "Support ARM V6K processor extensions" if !SMP
- depends on CPU_V6
- default y if SMP && !ARCH_MX3
- help
- Say Y here if your ARMv6 processor supports the 'K' extension.
- This enables the kernel to use some instructions not present
- on previous processors, and as such a kernel build with this
- enabled will not boot on processors with do not support these
- instructions.
+config CPU_V6K
+ bool "Support ARM V6K processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX
+ select CPU_32v6
+ select CPU_32v6K
+ select CPU_ABRT_EV6
+ select CPU_CACHE_V6
+ select CPU_CACHE_VIPT
+ select CPU_COPY_V6 if MMU
+ select CPU_CP15_MMU
+ select CPU_HAS_ASID if MMU
+ select CPU_PABRT_V6
+ select CPU_TLB_V6 if MMU
# ARMv7
config CPU_V7
- bool "Support ARM V7 processor"
- depends on ARCH_INTEGRATOR
+ bool "Support ARM V7 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX
select CPU_32v6K
select CPU_32v7
select CPU_ABRT_EV7
select CPU_CACHE_V7
select CPU_CACHE_VIPT
- select CPU_CP15_MMU
- select CPU_HAS_ASID if MMU
select CPU_COPY_V6 if MMU
+ select CPU_CP15_MMU if MMU
+ select CPU_CP15_MPU if !MMU
+ select CPU_HAS_ASID if MMU
+ select CPU_PABRT_V7
select CPU_TLB_V7 if MMU
+# ARMv7M
+config CPU_V7M
+ bool
+ select CPU_32v7M
+ select CPU_ABRT_NOMMU
+ select CPU_CACHE_NOP
+ select CPU_PABRT_LEGACY
+ select CPU_THUMBONLY
+
+config CPU_THUMBONLY
+ bool
+ # There are no CPUs available with MMU that don't implement an ARM ISA:
+ depends on !MMU
+ help
+ Select this if your CPU doesn't support the 32 bit ARM instructions.
+
# Figure out what processor architecture version we should be using.
# This defines the compiler instruction set which depends on the machine type.
config CPU_32v3
bool
- select TLS_REG_EMUL if SMP || !MMU
+ select CPU_USE_DOMAINS if MMU
select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
+ select NEED_KUSER_HELPERS
+ select TLS_REG_EMUL if SMP || !MMU
config CPU_32v4
bool
- select TLS_REG_EMUL if SMP || !MMU
+ select CPU_USE_DOMAINS if MMU
select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
+ select NEED_KUSER_HELPERS
+ select TLS_REG_EMUL if SMP || !MMU
config CPU_32v4T
bool
- select TLS_REG_EMUL if SMP || !MMU
+ select CPU_USE_DOMAINS if MMU
select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
+ select NEED_KUSER_HELPERS
+ select TLS_REG_EMUL if SMP || !MMU
config CPU_32v5
bool
- select TLS_REG_EMUL if SMP || !MMU
+ select CPU_USE_DOMAINS if MMU
select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
+ select NEED_KUSER_HELPERS
+ select TLS_REG_EMUL if SMP || !MMU
config CPU_32v6
bool
select TLS_REG_EMUL if !CPU_32v6K && !MMU
+config CPU_32v6K
+ bool
+
config CPU_32v7
bool
+config CPU_32v7M
+ bool
+
# The abort model
config CPU_ABRT_NOMMU
bool
@@ -436,10 +482,16 @@ config CPU_ABRT_EV6
config CPU_ABRT_EV7
bool
-# The cache model
-config CPU_CACHE_V3
+config CPU_PABRT_LEGACY
+ bool
+
+config CPU_PABRT_V6
bool
+config CPU_PABRT_V7
+ bool
+
+# The cache model
config CPU_CACHE_V4
bool
@@ -455,32 +507,36 @@ config CPU_CACHE_V6
config CPU_CACHE_V7
bool
+config CPU_CACHE_NOP
+ bool
+
config CPU_CACHE_VIVT
bool
config CPU_CACHE_VIPT
bool
-if MMU
-# The copy-page model
-config CPU_COPY_V3
+config CPU_CACHE_FA
bool
+if MMU
+# The copy-page model
config CPU_COPY_V4WT
bool
config CPU_COPY_V4WB
bool
-config CPU_COPY_V6
+config CPU_COPY_FEROCEON
bool
-# This selects the TLB model
-config CPU_TLB_V3
+config CPU_COPY_FA
+ bool
+
+config CPU_COPY_V6
bool
- help
- ARM Architecture Version 3 TLB.
+# This selects the TLB model
config CPU_TLB_V4WT
bool
help
@@ -497,12 +553,26 @@ config CPU_TLB_V4WBI
ARM Architecture Version 4 TLB with writeback cache and invalidate
instruction cache entry.
+config CPU_TLB_FEROCEON
+ bool
+ help
+ Feroceon TLB (v4wbi with non-outer-cachable page table walks).
+
+config CPU_TLB_FA
+ bool
+ help
+ Faraday ARM FA526 architecture, unified TLB with writeback cache
+ and invalidate instruction cache entry. Branch target buffer is
+ also supported.
+
config CPU_TLB_V6
bool
config CPU_TLB_V7
bool
+config VERIFY_PERMISSION_FAULT
+ bool
endif
config CPU_HAS_ASID
@@ -528,6 +598,12 @@ config CPU_CP15_MPU
help
Processor has the CP15 register, which has MPU related registers.
+config CPU_USE_DOMAINS
+ bool
+ help
+ This option enables or disables the use of domain switching
+ via the set_fs() function.
+
#
# CPU supports 36-bit I/O
#
@@ -536,9 +612,31 @@ config IO_36
comment "Processor Features"
+config ARM_LPAE
+ bool "Support for the Large Physical Address Extension"
+ depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \
+ !CPU_32v4 && !CPU_32v3
+ help
+ Say Y if you have an ARMv7 processor supporting the LPAE page
+ table format and you would like to access memory beyond the
+ 4GB limit. The resulting kernel image will not run on
+ processors without the LPA extension.
+
+ If unsure, say N.
+
+config ARCH_PHYS_ADDR_T_64BIT
+ def_bool ARM_LPAE
+
+config ARCH_DMA_ADDR_T_64BIT
+ bool
+
config ARM_THUMB
- bool "Support Thumb user binaries"
- depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_V6 || CPU_V7
+ bool "Support Thumb user binaries" if !CPU_THUMBONLY
+ depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || \
+ CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || \
+ CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \
+ CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || \
+ CPU_V7 || CPU_FEROCEON || CPU_V7M
default y
help
Say Y if you want to include kernel support for running user space
@@ -550,6 +648,53 @@ config ARM_THUMB
If you don't know what this all is, saying Y is a safe choice.
+config ARM_THUMBEE
+ bool "Enable ThumbEE CPU extension"
+ depends on CPU_V7
+ help
+ Say Y here if you have a CPU with the ThumbEE extension and code to
+ make use of it. Say N for code that can run on CPUs without ThumbEE.
+
+config ARM_VIRT_EXT
+ bool
+ depends on MMU
+ default y if CPU_V7
+ help
+ Enable the kernel to make use of the ARM Virtualization
+ Extensions to install hypervisors without run-time firmware
+ assistance.
+
+ A compliant bootloader is required in order to make maximum
+ use of this feature. Refer to Documentation/arm/Booting for
+ details.
+
+config SWP_EMULATE
+ bool "Emulate SWP/SWPB instructions"
+ depends on CPU_V7
+ default y if SMP
+ select HAVE_PROC_CPU if PROC_FS
+ help
+ ARMv6 architecture deprecates use of the SWP/SWPB instructions.
+ ARMv7 multiprocessing extensions introduce the ability to disable
+ these instructions, triggering an undefined instruction exception
+ when executed. Say Y here to enable software emulation of these
+ instructions for userspace (not kernel) using LDREX/STREX.
+ Also creates /proc/cpu/swp_emulation for statistics.
+
+ In some older versions of glibc [<=2.8] SWP is used during futex
+ trylock() operations with the assumption that the code will not
+ be preempted. This invalid assumption may be more likely to fail
+ with SWP emulation enabled, leading to deadlock of the user
+ application.
+
+ NOTE: when accessing uncached shared regions, LDREX/STREX rely
+ on an external transaction monitoring block called a global
+ monitor to maintain update atomicity. If your system does not
+ implement a global monitor, this option can cause programs that
+ perform SWP operations to uncached memory to deadlock.
+
+ If unsure, say Y.
+
config CPU_BIG_ENDIAN
bool "Build big-endian kernel"
depends on ARCH_SUPPORTS_BIG_ENDIAN
@@ -559,13 +704,26 @@ config CPU_BIG_ENDIAN
port must properly enable any big-endian related features
of your chipset/board/processor.
+config CPU_ENDIAN_BE8
+ bool
+ depends on CPU_BIG_ENDIAN
+ default CPU_V6 || CPU_V6K || CPU_V7
+ help
+ Support for the BE-8 (big-endian) mode on ARMv6 and ARMv7 processors.
+
+config CPU_ENDIAN_BE32
+ bool
+ depends on CPU_BIG_ENDIAN
+ default !CPU_ENDIAN_BE8
+ help
+ Support for the BE-32 (big-endian) mode on pre-ARMv6 processors.
+
config CPU_HIGH_VECTOR
depends on !MMU && CPU_CP15 && !CPU_ARM740T
bool "Select the High exception vector"
- default n
help
Say Y here to select high exception vector(0xFFFF0000~).
- The exception vector can be vary depending on the platform
+ The exception vector can vary depending on the platform
design in nommu mode. If your platform needs to select
high exception vector, say Y.
Otherwise or if you are unsure, say N, and the low exception
@@ -573,7 +731,7 @@ config CPU_HIGH_VECTOR
config CPU_ICACHE_DISABLE
bool "Disable I-Cache (I-bit)"
- depends on CPU_CP15 && !(CPU_ARM610 || CPU_ARM710 || CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3)
+ depends on CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3)
help
Say Y here to disable the processor instruction cache. Unless
you have a reason not to or are unsure, say N.
@@ -600,7 +758,7 @@ config CPU_DCACHE_SIZE
config CPU_DCACHE_WRITETHROUGH
bool "Force write through D-cache"
- depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020) && !CPU_DCACHE_DISABLE
+ depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE
default y if CPU_ARM925T
help
Say Y here to use the data cache in writethrough mode. Unless you
@@ -615,39 +773,237 @@ config CPU_CACHE_ROUND_ROBIN
config CPU_BPREDICT_DISABLE
bool "Disable branch prediction"
- depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7
+ depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526
help
Say Y here to disable branch prediction. If unsure, say N.
config TLS_REG_EMUL
bool
+ select NEED_KUSER_HELPERS
help
An SMP system using a pre-ARMv6 processor (there are apparently
a few prototypes like that in existence) and therefore access to
that required register must be emulated.
-config HAS_TLS_REG
- bool
- depends on !TLS_REG_EMUL
- default y if SMP || CPU_32v7
- help
- This selects support for the CP15 thread register.
- It is defined to be available on some ARMv6 processors (including
- all SMP capable ARMv6's) or later processors. User space may
- assume directly accessing that register and always obtain the
- expected value only on ARMv7 and above.
-
config NEEDS_SYSCALL_FOR_CMPXCHG
bool
+ select NEED_KUSER_HELPERS
help
SMP on a pre-ARMv6 processor? Well OK then.
Forget about fast user space cmpxchg support.
It is just not possible.
+config NEED_KUSER_HELPERS
+ bool
+
+config KUSER_HELPERS
+ bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS
+ default y
+ help
+ Warning: disabling this option may break user programs.
+
+ Provide kuser helpers in the vector page. The kernel provides
+ helper code to userspace in read only form at a fixed location
+ in the high vector page to allow userspace to be independent of
+ the CPU type fitted to the system. This permits binaries to be
+ run on ARMv4 through to ARMv7 without modification.
+
+ See Documentation/arm/kernel_user_helpers.txt for details.
+
+ However, the fixed address nature of these helpers can be used
+ by ROP (return orientated programming) authors when creating
+ exploits.
+
+ If all of the binaries and libraries which run on your platform
+ are built specifically for your platform, and make no use of
+ these helpers, then you can turn this option off to hinder
+ such exploits. However, in that case, if a binary or library
+ relying on those helpers is run, it will receive a SIGILL signal,
+ which will terminate the program.
+
+ Say N here only if you are absolutely certain that you do not
+ need these helpers; otherwise, the safe option is to say Y.
+
+config DMA_CACHE_RWFO
+ bool "Enable read/write for ownership DMA cache maintenance"
+ depends on CPU_V6K && SMP
+ default y
+ help
+ The Snoop Control Unit on ARM11MPCore does not detect the
+ cache maintenance operations and the dma_{map,unmap}_area()
+ functions may leave stale cache entries on other CPUs. By
+ enabling this option, Read or Write For Ownership in the ARMv6
+ DMA cache maintenance functions is performed. These LDR/STR
+ instructions change the cache line state to shared or modified
+ so that the cache operation has the desired effect.
+
+ Note that the workaround is only valid on processors that do
+ not perform speculative loads into the D-cache. For such
+ processors, if cache maintenance operations are not broadcast
+ in hardware, other workarounds are needed (e.g. cache
+ maintenance broadcasting in software via FIQ).
+
config OUTER_CACHE
bool
- default n
+
+config OUTER_CACHE_SYNC
+ bool
+ help
+ The outer cache has a outer_cache_fns.sync function pointer
+ that can be used to drain the write buffer of the outer cache.
+
+config CACHE_FEROCEON_L2
+ bool "Enable the Feroceon L2 cache controller"
+ depends on ARCH_KIRKWOOD || ARCH_MV78XX0 || ARCH_MVEBU
+ default y
+ select OUTER_CACHE
+ help
+ This option enables the Feroceon L2 cache controller.
+
+config CACHE_FEROCEON_L2_WRITETHROUGH
+ bool "Force Feroceon L2 cache write through"
+ depends on CACHE_FEROCEON_L2
+ help
+ Say Y here to use the Feroceon L2 cache in writethrough mode.
+ Unless you specifically require this, say N for writeback mode.
+
+config MIGHT_HAVE_CACHE_L2X0
+ bool
+ help
+ This option should be selected by machines which have a L2x0
+ or PL310 cache controller, but where its use is optional.
+
+ The only effect of this option is to make CACHE_L2X0 and
+ related options available to the user for configuration.
+
+ Boards or SoCs which always require the cache controller
+ support to be present should select CACHE_L2X0 directly
+ instead of this option, thus preventing the user from
+ inadvertently configuring a broken kernel.
config CACHE_L2X0
+ bool "Enable the L2x0 outer cache controller" if MIGHT_HAVE_CACHE_L2X0
+ default MIGHT_HAVE_CACHE_L2X0
+ select OUTER_CACHE
+ select OUTER_CACHE_SYNC
+ help
+ This option enables the L2x0 PrimeCell.
+
+if CACHE_L2X0
+
+config CACHE_PL310
bool
+ default y if CPU_V7 && !(CPU_V6 || CPU_V6K)
+ help
+ This option enables optimisations for the PL310 cache
+ controller.
+
+config PL310_ERRATA_588369
+ bool "PL310 errata: Clean & Invalidate maintenance operations do not invalidate clean lines"
+ help
+ The PL310 L2 cache controller implements three types of Clean &
+ Invalidate maintenance operations: by Physical Address
+ (offset 0x7F0), by Index/Way (0x7F8) and by Way (0x7FC).
+ They are architecturally defined to behave as the execution of a
+ clean operation followed immediately by an invalidate operation,
+ both performing to the same memory location. This functionality
+ is not correctly implemented in PL310 as clean lines are not
+ invalidated as a result of these operations.
+
+config PL310_ERRATA_727915
+ bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption"
+ help
+ PL310 implements the Clean & Invalidate by Way L2 cache maintenance
+ operation (offset 0x7FC). This operation runs in background so that
+ PL310 can handle normal accesses while it is in progress. Under very
+ rare circumstances, due to this erratum, write data can be lost when
+ PL310 treats a cacheable write transaction during a Clean &
+ Invalidate by Way operation.
+
+config PL310_ERRATA_753970
+ bool "PL310 errata: cache sync operation may be faulty"
+ help
+ This option enables the workaround for the 753970 PL310 (r3p0) erratum.
+
+ Under some condition the effect of cache sync operation on
+ the store buffer still remains when the operation completes.
+ This means that the store buffer is always asked to drain and
+ this prevents it from merging any further writes. The workaround
+ is to replace the normal offset of cache sync operation (0x730)
+ by another offset targeting an unmapped PL310 register 0x740.
+ This has the same effect as the cache sync operation: store buffer
+ drain and waiting for all buffers empty.
+
+config PL310_ERRATA_769419
+ bool "PL310 errata: no automatic Store Buffer drain"
+ help
+ On revisions of the PL310 prior to r3p2, the Store Buffer does
+ not automatically drain. This can cause normal, non-cacheable
+ writes to be retained when the memory system is idle, leading
+ to suboptimal I/O performance for drivers using coherent DMA.
+ This option adds a write barrier to the cpu_idle loop so that,
+ on systems with an outer cache, the store buffer is drained
+ explicitly.
+
+endif
+
+config CACHE_TAUROS2
+ bool "Enable the Tauros2 L2 cache controller"
+ depends on (ARCH_DOVE || ARCH_MMP || CPU_PJ4)
+ default y
select OUTER_CACHE
+ help
+ This option enables the Tauros2 L2 cache controller (as
+ found on PJ1/PJ4).
+
+config CACHE_XSC3L2
+ bool "Enable the L2 cache on XScale3"
+ depends on CPU_XSC3
+ default y
+ select OUTER_CACHE
+ help
+ This option enables the L2 cache on XScale3.
+
+config ARM_L1_CACHE_SHIFT_6
+ bool
+ default y if CPU_V7
+ help
+ Setting ARM L1 cache line size to 64 Bytes.
+
+config ARM_L1_CACHE_SHIFT
+ int
+ default 6 if ARM_L1_CACHE_SHIFT_6
+ default 5
+
+config ARM_DMA_MEM_BUFFERABLE
+ bool "Use non-cacheable memory for DMA" if (CPU_V6 || CPU_V6K) && !CPU_V7
+ depends on !(MACH_REALVIEW_PB1176 || REALVIEW_EB_ARM11MP || \
+ MACH_REALVIEW_PB11MP)
+ default y if CPU_V6 || CPU_V6K || CPU_V7
+ help
+ Historically, the kernel has used strongly ordered mappings to
+ provide DMA coherent memory. With the advent of ARMv7, mapping
+ memory with differing types results in unpredictable behaviour,
+ so on these CPUs, this option is forced on.
+
+ Multiple mappings with differing attributes is also unpredictable
+ on ARMv6 CPUs, but since they do not have aggressive speculative
+ prefetch, no harm appears to occur.
+
+ However, drivers may be missing the necessary barriers for ARMv6,
+ and therefore turning this on may result in unpredictable driver
+ behaviour. Therefore, we offer this as an option.
+
+ You are recommended say 'Y' here and debug any affected drivers.
+
+config ARCH_HAS_BARRIERS
+ bool
+ help
+ This option allows the use of custom mandatory barriers
+ included via the mach/barriers.h file.
+
+config ARCH_SUPPORTS_BIG_ENDIAN
+ bool
+ help
+ This option specifies the architecture can support big endian
+ operation.
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 762702765fc..91da64de440 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -2,20 +2,22 @@
# Makefile for the linux arm-specific parts of the memory manager.
#
-obj-y := consistent.o extable.o fault.o init.o \
+obj-y := dma-mapping.o extable.o fault.o init.o \
iomap.o
-obj-$(CONFIG_MMU) += fault-armv.o flush.o ioremap.o mmap.o \
- pgd.o mmu.o
+obj-$(CONFIG_MMU) += fault-armv.o flush.o idmap.o ioremap.o \
+ mmap.o pgd.o mmu.o
ifneq ($(CONFIG_MMU),y)
obj-y += nommu.o
endif
+obj-$(CONFIG_ARM_PTDUMP) += dump.o
obj-$(CONFIG_MODULES) += proc-syms.o
obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o
-obj-$(CONFIG_DISCONTIGMEM) += discontig.o
+obj-$(CONFIG_HIGHMEM) += highmem.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o
obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o
@@ -26,30 +28,44 @@ obj-$(CONFIG_CPU_ABRT_EV5TJ) += abort-ev5tj.o
obj-$(CONFIG_CPU_ABRT_EV6) += abort-ev6.o
obj-$(CONFIG_CPU_ABRT_EV7) += abort-ev7.o
-obj-$(CONFIG_CPU_CACHE_V3) += cache-v3.o
+AFLAGS_abort-ev6.o :=-Wa,-march=armv6k
+AFLAGS_abort-ev7.o :=-Wa,-march=armv7-a
+
+obj-$(CONFIG_CPU_PABRT_LEGACY) += pabort-legacy.o
+obj-$(CONFIG_CPU_PABRT_V6) += pabort-v6.o
+obj-$(CONFIG_CPU_PABRT_V7) += pabort-v7.o
+
obj-$(CONFIG_CPU_CACHE_V4) += cache-v4.o
obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o
obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o
obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o
obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o
+obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o
+obj-$(CONFIG_CPU_CACHE_NOP) += cache-nop.o
+
+AFLAGS_cache-v6.o :=-Wa,-march=armv6
+AFLAGS_cache-v7.o :=-Wa,-march=armv7-a
-obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o
obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o
obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o
+obj-$(CONFIG_CPU_COPY_FEROCEON) += copypage-feroceon.o
obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o context.o
obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o
obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o
obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o
+obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o
-obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o
obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o
obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o
obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o
+obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions
obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o
obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o
+obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o
+
+AFLAGS_tlb-v6.o :=-Wa,-march=armv6
+AFLAGS_tlb-v7.o :=-Wa,-march=armv7-a
-obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o
-obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o
obj-$(CONFIG_CPU_ARM7TDMI) += proc-arm7tdmi.o
obj-$(CONFIG_CPU_ARM720T) += proc-arm720.o
obj-$(CONFIG_CPU_ARM740T) += proc-arm740.o
@@ -60,6 +76,7 @@ obj-$(CONFIG_CPU_ARM925T) += proc-arm925.o
obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o
obj-$(CONFIG_CPU_ARM940T) += proc-arm940.o
obj-$(CONFIG_CPU_ARM946E) += proc-arm946.o
+obj-$(CONFIG_CPU_FA526) += proc-fa526.o
obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o
obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o
obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o
@@ -68,7 +85,18 @@ obj-$(CONFIG_CPU_SA110) += proc-sa110.o
obj-$(CONFIG_CPU_SA1100) += proc-sa1100.o
obj-$(CONFIG_CPU_XSCALE) += proc-xscale.o
obj-$(CONFIG_CPU_XSC3) += proc-xsc3.o
+obj-$(CONFIG_CPU_MOHAWK) += proc-mohawk.o
+obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o
obj-$(CONFIG_CPU_V6) += proc-v6.o
+obj-$(CONFIG_CPU_V6K) += proc-v6.o
obj-$(CONFIG_CPU_V7) += proc-v7.o
+obj-$(CONFIG_CPU_V7M) += proc-v7m.o
+
+AFLAGS_proc-v6.o :=-Wa,-march=armv6
+AFLAGS_proc-v7.o :=-Wa,-march=armv7-a
-obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o
+obj-$(CONFIG_OUTER_CACHE) += l2c-common.o
+obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o
+obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o
+obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o
+obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o
diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S
index 4f18f9e87ba..54473cd4aba 100644
--- a/arch/arm/mm/abort-ev4.S
+++ b/arch/arm/mm/abort-ev4.S
@@ -3,14 +3,11 @@
/*
* Function: v4_early_abort
*
- * Params : r2 = address of aborted instruction
- * : r3 = saved SPSR
+ * Params : r2 = pt_regs
+ * : r4 = aborted context pc
+ * : r5 = aborted context psr
*
- * Returns : r0 = address of abort
- * : r1 = FSR, bit 11 = write
- * : r2-r8 = corrupted
- * : r9 = preserved
- * : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
*
* Purpose : obtain information about current aborted instruction.
* Note: we read user space. This means we might cause a data
@@ -21,10 +18,8 @@
ENTRY(v4_early_abort)
mrc p15, 0, r1, c5, c0, 0 @ get FSR
mrc p15, 0, r0, c6, c0, 0 @ get FAR
- ldr r3, [r2] @ read aborted ARM instruction
+ ldr r3, [r4] @ read aborted ARM instruction
bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR
tst r3, #1 << 20 @ L = 1 -> write?
orreq r1, r1, #1 << 11 @ yes.
- mov pc, lr
-
-
+ b do_DataAbort
diff --git a/arch/arm/mm/abort-ev4t.S b/arch/arm/mm/abort-ev4t.S
index b6282548f92..9da704e7b86 100644
--- a/arch/arm/mm/abort-ev4t.S
+++ b/arch/arm/mm/abort-ev4t.S
@@ -4,14 +4,11 @@
/*
* Function: v4t_early_abort
*
- * Params : r2 = address of aborted instruction
- * : r3 = saved SPSR
+ * Params : r2 = pt_regs
+ * : r4 = aborted context pc
+ * : r5 = aborted context psr
*
- * Returns : r0 = address of abort
- * : r1 = FSR, bit 11 = write
- * : r2-r8 = corrupted
- * : r9 = preserved
- * : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
*
* Purpose : obtain information about current aborted instruction.
* Note: we read user space. This means we might cause a data
@@ -22,9 +19,9 @@
ENTRY(v4t_early_abort)
mrc p15, 0, r1, c5, c0, 0 @ get FSR
mrc p15, 0, r0, c6, c0, 0 @ get FAR
- do_thumb_abort
- ldreq r3, [r2] @ read aborted ARM instruction
+ do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
+ ldreq r3, [r4] @ read aborted ARM instruction
bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR
tst r3, #1 << 20 @ check write
orreq r1, r1, #1 << 11
- mov pc, lr
+ b do_DataAbort
diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S
index 02251b526c0..a0908d4653a 100644
--- a/arch/arm/mm/abort-ev5t.S
+++ b/arch/arm/mm/abort-ev5t.S
@@ -4,14 +4,11 @@
/*
* Function: v5t_early_abort
*
- * Params : r2 = address of aborted instruction
- * : r3 = saved SPSR
+ * Params : r2 = pt_regs
+ * : r4 = aborted context pc
+ * : r5 = aborted context psr
*
- * Returns : r0 = address of abort
- * : r1 = FSR, bit 11 = write
- * : r2-r8 = corrupted
- * : r9 = preserved
- * : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
*
* Purpose : obtain information about current aborted instruction.
* Note: we read user space. This means we might cause a data
@@ -22,10 +19,10 @@
ENTRY(v5t_early_abort)
mrc p15, 0, r1, c5, c0, 0 @ get FSR
mrc p15, 0, r0, c6, c0, 0 @ get FAR
- do_thumb_abort
- ldreq r3, [r2] @ read aborted ARM instruction
+ do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
+ ldreq r3, [r4] @ read aborted ARM instruction
bic r1, r1, #1 << 11 @ clear bits 11 of FSR
- do_ldrd_abort
+ do_ldrd_abort tmp=ip, insn=r3
tst r3, #1 << 20 @ check write
orreq r1, r1, #1 << 11
- mov pc, lr
+ b do_DataAbort
diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S
index bce68d601c8..4006b7a6126 100644
--- a/arch/arm/mm/abort-ev5tj.S
+++ b/arch/arm/mm/abort-ev5tj.S
@@ -4,14 +4,11 @@
/*
* Function: v5tj_early_abort
*
- * Params : r2 = address of aborted instruction
- * : r3 = saved SPSR
+ * Params : r2 = pt_regs
+ * : r4 = aborted context pc
+ * : r5 = aborted context psr
*
- * Returns : r0 = address of abort
- * : r1 = FSR, bit 11 = write
- * : r2-r8 = corrupted
- * : r9 = preserved
- * : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
*
* Purpose : obtain information about current aborted instruction.
* Note: we read user space. This means we might cause a data
@@ -23,13 +20,11 @@ ENTRY(v5tj_early_abort)
mrc p15, 0, r1, c5, c0, 0 @ get FSR
mrc p15, 0, r0, c6, c0, 0 @ get FAR
bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR
- tst r3, #PSR_J_BIT @ Java?
- movne pc, lr
- do_thumb_abort
- ldreq r3, [r2] @ read aborted ARM instruction
- do_ldrd_abort
+ tst r5, #PSR_J_BIT @ Java?
+ bne do_DataAbort
+ do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
+ ldreq r3, [r4] @ read aborted ARM instruction
+ do_ldrd_abort tmp=ip, insn=r3
tst r3, #1 << 20 @ L = 0 -> write
orreq r1, r1, #1 << 11 @ yes.
- mov pc, lr
-
-
+ b do_DataAbort
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
index 8a7f65ba14b..3815a8262af 100644
--- a/arch/arm/mm/abort-ev6.S
+++ b/arch/arm/mm/abort-ev6.S
@@ -4,14 +4,11 @@
/*
* Function: v6_early_abort
*
- * Params : r2 = address of aborted instruction
- * : r3 = saved SPSR
+ * Params : r2 = pt_regs
+ * : r4 = aborted context pc
+ * : r5 = aborted context psr
*
- * Returns : r0 = address of abort
- * : r1 = FSR, bit 11 = write
- * : r2-r8 = corrupted
- * : r9 = preserved
- * : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
*
* Purpose : obtain information about current aborted instruction.
* Note: we read user space. This means we might cause a data
@@ -20,25 +17,31 @@
*/
.align 5
ENTRY(v6_early_abort)
-#ifdef CONFIG_CPU_32v6K
+#ifdef CONFIG_CPU_V6
+ sub r1, sp, #4 @ Get unused stack location
+ strex r0, r1, [r1] @ Clear the exclusive monitor
+#elif defined(CONFIG_CPU_32v6K)
clrex
-#else
- strex r0, r1, [sp] @ Clear the exclusive monitor
#endif
mrc p15, 0, r1, c5, c0, 0 @ get FSR
mrc p15, 0, r0, c6, c0, 0 @ get FAR
/*
* Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR.
- * The test below covers all the write situations, including Java bytecodes
*/
- bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR
- tst r3, #PSR_J_BIT @ Java?
- movne pc, lr
- do_thumb_abort
- ldreq r3, [r2] @ read aborted ARM instruction
- do_ldrd_abort
+#ifdef CONFIG_ARM_ERRATA_326103
+ ldr ip, =0x4107b36
+ mrc p15, 0, r3, c0, c0, 0 @ get processor id
+ teq ip, r3, lsr #4 @ r0 ARM1136?
+ bne do_DataAbort
+ tst r5, #PSR_J_BIT @ Java?
+ tsteq r5, #PSR_T_BIT @ Thumb?
+ bne do_DataAbort
+ bic r1, r1, #1 << 11 @ clear bit 11 of FSR
+ ldr r3, [r4] @ read aborted ARM instruction
+ ARM_BE8(rev r3, r3)
+
+ do_ldrd_abort tmp=ip, insn=r3
tst r3, #1 << 20 @ L = 0 -> write
orreq r1, r1, #1 << 11 @ yes.
- mov pc, lr
-
-
+#endif
+ b do_DataAbort
diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S
index eb90bce38e1..703375277ba 100644
--- a/arch/arm/mm/abort-ev7.S
+++ b/arch/arm/mm/abort-ev7.S
@@ -3,14 +3,11 @@
/*
* Function: v7_early_abort
*
- * Params : r2 = address of aborted instruction
- * : r3 = saved SPSR
+ * Params : r2 = pt_regs
+ * : r4 = aborted context pc
+ * : r5 = aborted context psr
*
- * Returns : r0 = address of abort
- * : r1 = FSR, bit 11 = write
- * : r2-r8 = corrupted
- * : r9 = preserved
- * : sp = pointer to registers
+ * Returns : r4 - r11, r13 preserved
*
* Purpose : obtain information about current aborted instruction.
*/
@@ -29,4 +26,26 @@ ENTRY(v7_early_abort)
* V6 code adjusts the returned DFSR.
* New designs should not need to patch up faults.
*/
- mov pc, lr
+
+#if defined(CONFIG_VERIFY_PERMISSION_FAULT)
+ /*
+ * Detect erroneous permission failures and fix
+ */
+ ldr r3, =0x40d @ On permission fault
+ and r3, r1, r3
+ cmp r3, #0x0d
+ bne do_DataAbort
+
+ mcr p15, 0, r0, c7, c8, 0 @ Retranslate FAR
+ isb
+ mrc p15, 0, ip, c7, c4, 0 @ Read the PAR
+ and r3, ip, #0x7b @ On translation fault
+ cmp r3, #0x0b
+ bne do_DataAbort
+ bic r1, r1, #0xf @ Fix up FSR FS[5:0]
+ and ip, ip, #0x7e
+ orr r1, r1, ip, LSR #1
+#endif
+
+ b do_DataAbort
+ENDPROC(v7_early_abort)
diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S
index 9fb7b0e25ea..f3982580c27 100644
--- a/arch/arm/mm/abort-lv4t.S
+++ b/arch/arm/mm/abort-lv4t.S
@@ -3,14 +3,11 @@
/*
* Function: v4t_late_abort
*
- * Params : r2 = address of aborted instruction
- * : r3 = saved SPSR
+ * Params : r2 = pt_regs
+ * : r4 = aborted context pc
+ * : r5 = aborted context psr
*
- * Returns : r0 = address of abort
- * : r1 = FSR, bit 11 = write
- * : r2-r8 = corrupted
- * : r9 = preserved
- * : sp = pointer to registers
+ * Returns : r4-r5, r10-r11, r13 preserved
*
* Purpose : obtain information about current aborted instruction.
* Note: we read user space. This means we might cause a data
@@ -18,7 +15,7 @@
* picture. Unfortunately, this does happen. We live with it.
*/
ENTRY(v4t_late_abort)
- tst r3, #PSR_T_BIT @ check for thumb mode
+ tst r5, #PSR_T_BIT @ check for thumb mode
#ifdef CONFIG_CPU_CP15_MMU
mrc p15, 0, r1, c5, c0, 0 @ get FSR
mrc p15, 0, r0, c6, c0, 0 @ get FAR
@@ -28,7 +25,7 @@ ENTRY(v4t_late_abort)
mov r1, #0
#endif
bne .data_thumb_abort
- ldr r8, [r2] @ read arm instruction
+ ldr r8, [r4] @ read arm instruction
tst r8, #1 << 20 @ L = 1 -> write?
orreq r1, r1, #1 << 11 @ yes.
and r7, r8, #15 << 24
@@ -47,86 +44,84 @@ ENTRY(v4t_late_abort)
/* 9 */ b .data_arm_ldmstm @ ldm*b rn, <rlist>
/* a */ b .data_unknown
/* b */ b .data_unknown
-/* c */ mov pc, lr @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m
-/* d */ mov pc, lr @ ldc rd, [rn, #m]
+/* c */ b do_DataAbort @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m
+/* d */ b do_DataAbort @ ldc rd, [rn, #m]
/* e */ b .data_unknown
/* f */
.data_unknown: @ Part of jumptable
- mov r0, r2
+ mov r0, r4
mov r1, r8
- mov r2, sp
- bl baddataabort
- b ret_from_exception
+ b baddataabort
.data_arm_ldmstm:
tst r8, #1 << 21 @ check writeback bit
- moveq pc, lr @ no writeback -> no fixup
+ beq do_DataAbort @ no writeback -> no fixup
mov r7, #0x11
orr r7, r7, #0x1100
and r6, r8, r7
- and r2, r8, r7, lsl #1
- add r6, r6, r2, lsr #1
- and r2, r8, r7, lsl #2
- add r6, r6, r2, lsr #2
- and r2, r8, r7, lsl #3
- add r6, r6, r2, lsr #3
+ and r9, r8, r7, lsl #1
+ add r6, r6, r9, lsr #1
+ and r9, r8, r7, lsl #2
+ add r6, r6, r9, lsr #2
+ and r9, r8, r7, lsl #3
+ add r6, r6, r9, lsr #3
add r6, r6, r6, lsr #8
add r6, r6, r6, lsr #4
and r6, r6, #15 @ r6 = no. of registers to transfer.
- and r5, r8, #15 << 16 @ Extract 'n' from instruction
- ldr r7, [sp, r5, lsr #14] @ Get register 'Rn'
+ and r9, r8, #15 << 16 @ Extract 'n' from instruction
+ ldr r7, [r2, r9, lsr #14] @ Get register 'Rn'
tst r8, #1 << 23 @ Check U bit
subne r7, r7, r6, lsl #2 @ Undo increment
addeq r7, r7, r6, lsl #2 @ Undo decrement
- str r7, [sp, r5, lsr #14] @ Put register 'Rn'
- mov pc, lr
+ str r7, [r2, r9, lsr #14] @ Put register 'Rn'
+ b do_DataAbort
.data_arm_lateldrhpre:
tst r8, #1 << 21 @ Check writeback bit
- moveq pc, lr @ No writeback -> no fixup
+ beq do_DataAbort @ No writeback -> no fixup
.data_arm_lateldrhpost:
- and r5, r8, #0x00f @ get Rm / low nibble of immediate value
+ and r9, r8, #0x00f @ get Rm / low nibble of immediate value
tst r8, #1 << 22 @ if (immediate offset)
andne r6, r8, #0xf00 @ { immediate high nibble
- orrne r6, r5, r6, lsr #4 @ combine nibbles } else
- ldreq r6, [sp, r5, lsl #2] @ { load Rm value }
+ orrne r6, r9, r6, lsr #4 @ combine nibbles } else
+ ldreq r6, [r2, r9, lsl #2] @ { load Rm value }
.data_arm_apply_r6_and_rn:
- and r5, r8, #15 << 16 @ Extract 'n' from instruction
- ldr r7, [sp, r5, lsr #14] @ Get register 'Rn'
+ and r9, r8, #15 << 16 @ Extract 'n' from instruction
+ ldr r7, [r2, r9, lsr #14] @ Get register 'Rn'
tst r8, #1 << 23 @ Check U bit
subne r7, r7, r6 @ Undo incrmenet
addeq r7, r7, r6 @ Undo decrement
- str r7, [sp, r5, lsr #14] @ Put register 'Rn'
- mov pc, lr
+ str r7, [r2, r9, lsr #14] @ Put register 'Rn'
+ b do_DataAbort
.data_arm_lateldrpreconst:
tst r8, #1 << 21 @ check writeback bit
- moveq pc, lr @ no writeback -> no fixup
+ beq do_DataAbort @ no writeback -> no fixup
.data_arm_lateldrpostconst:
- movs r2, r8, lsl #20 @ Get offset
- moveq pc, lr @ zero -> no fixup
- and r5, r8, #15 << 16 @ Extract 'n' from instruction
- ldr r7, [sp, r5, lsr #14] @ Get register 'Rn'
+ movs r6, r8, lsl #20 @ Get offset
+ beq do_DataAbort @ zero -> no fixup
+ and r9, r8, #15 << 16 @ Extract 'n' from instruction
+ ldr r7, [r2, r9, lsr #14] @ Get register 'Rn'
tst r8, #1 << 23 @ Check U bit
- subne r7, r7, r2, lsr #20 @ Undo increment
- addeq r7, r7, r2, lsr #20 @ Undo decrement
- str r7, [sp, r5, lsr #14] @ Put register 'Rn'
- mov pc, lr
+ subne r7, r7, r6, lsr #20 @ Undo increment
+ addeq r7, r7, r6, lsr #20 @ Undo decrement
+ str r7, [r2, r9, lsr #14] @ Put register 'Rn'
+ b do_DataAbort
.data_arm_lateldrprereg:
tst r8, #1 << 21 @ check writeback bit
- moveq pc, lr @ no writeback -> no fixup
+ beq do_DataAbort @ no writeback -> no fixup
.data_arm_lateldrpostreg:
and r7, r8, #15 @ Extract 'm' from instruction
- ldr r6, [sp, r7, lsl #2] @ Get register 'Rm'
- mov r5, r8, lsr #7 @ get shift count
- ands r5, r5, #31
+ ldr r6, [r2, r7, lsl #2] @ Get register 'Rm'
+ mov r9, r8, lsr #7 @ get shift count
+ ands r9, r9, #31
and r7, r8, #0x70 @ get shift type
orreq r7, r7, #8 @ shift count = 0
add pc, pc, r7
nop
- mov r6, r6, lsl r5 @ 0: LSL #!0
+ mov r6, r6, lsl r9 @ 0: LSL #!0
b .data_arm_apply_r6_and_rn
b .data_arm_apply_r6_and_rn @ 1: LSL #0
nop
@@ -134,7 +129,7 @@ ENTRY(v4t_late_abort)
nop
b .data_unknown @ 3: MUL?
nop
- mov r6, r6, lsr r5 @ 4: LSR #!0
+ mov r6, r6, lsr r9 @ 4: LSR #!0
b .data_arm_apply_r6_and_rn
mov r6, r6, lsr #32 @ 5: LSR #32
b .data_arm_apply_r6_and_rn
@@ -142,7 +137,7 @@ ENTRY(v4t_late_abort)
nop
b .data_unknown @ 7: MUL?
nop
- mov r6, r6, asr r5 @ 8: ASR #!0
+ mov r6, r6, asr r9 @ 8: ASR #!0
b .data_arm_apply_r6_and_rn
mov r6, r6, asr #32 @ 9: ASR #32
b .data_arm_apply_r6_and_rn
@@ -150,7 +145,7 @@ ENTRY(v4t_late_abort)
nop
b .data_unknown @ B: MUL?
nop
- mov r6, r6, ror r5 @ C: ROR #!0
+ mov r6, r6, ror r9 @ C: ROR #!0
b .data_arm_apply_r6_and_rn
mov r6, r6, rrx @ D: RRX
b .data_arm_apply_r6_and_rn
@@ -159,7 +154,7 @@ ENTRY(v4t_late_abort)
b .data_unknown @ F: MUL?
.data_thumb_abort:
- ldrh r8, [r2] @ read instruction
+ ldrh r8, [r4] @ read instruction
tst r8, #1 << 11 @ L = 1 -> write?
orreq r1, r1, #1 << 8 @ yes
and r7, r8, #15 << 12
@@ -172,10 +167,10 @@ ENTRY(v4t_late_abort)
/* 3 */ b .data_unknown
/* 4 */ b .data_unknown
/* 5 */ b .data_thumb_reg
-/* 6 */ mov pc, lr
-/* 7 */ mov pc, lr
-/* 8 */ mov pc, lr
-/* 9 */ mov pc, lr
+/* 6 */ b do_DataAbort
+/* 7 */ b do_DataAbort
+/* 8 */ b do_DataAbort
+/* 9 */ b do_DataAbort
/* A */ b .data_unknown
/* B */ b .data_thumb_pushpop
/* C */ b .data_thumb_ldmstm
@@ -185,41 +180,41 @@ ENTRY(v4t_late_abort)
.data_thumb_reg:
tst r8, #1 << 9
- moveq pc, lr
+ beq do_DataAbort
tst r8, #1 << 10 @ If 'S' (signed) bit is set
movne r1, #0 @ it must be a load instr
- mov pc, lr
+ b do_DataAbort
.data_thumb_pushpop:
tst r8, #1 << 10
beq .data_unknown
and r6, r8, #0x55 @ hweight8(r8) + R bit
- and r2, r8, #0xaa
- add r6, r6, r2, lsr #1
- and r2, r6, #0xcc
+ and r9, r8, #0xaa
+ add r6, r6, r9, lsr #1
+ and r9, r6, #0xcc
and r6, r6, #0x33
- add r6, r6, r2, lsr #2
+ add r6, r6, r9, lsr #2
movs r7, r8, lsr #9 @ C = r8 bit 8 (R bit)
adc r6, r6, r6, lsr #4 @ high + low nibble + R bit
and r6, r6, #15 @ number of regs to transfer
- ldr r7, [sp, #13 << 2]
+ ldr r7, [r2, #13 << 2]
tst r8, #1 << 11
addeq r7, r7, r6, lsl #2 @ increment SP if PUSH
subne r7, r7, r6, lsl #2 @ decrement SP if POP
- str r7, [sp, #13 << 2]
- mov pc, lr
+ str r7, [r2, #13 << 2]
+ b do_DataAbort
.data_thumb_ldmstm:
and r6, r8, #0x55 @ hweight8(r8)
- and r2, r8, #0xaa
- add r6, r6, r2, lsr #1
- and r2, r6, #0xcc
+ and r9, r8, #0xaa
+ add r6, r6, r9, lsr #1
+ and r9, r6, #0xcc
and r6, r6, #0x33
- add r6, r6, r2, lsr #2
+ add r6, r6, r9, lsr #2
add r6, r6, r6, lsr #4
- and r5, r8, #7 << 8
- ldr r7, [sp, r5, lsr #6]
+ and r9, r8, #7 << 8
+ ldr r7, [r2, r9, lsr #6]
and r6, r6, #15 @ number of regs to transfer
sub r7, r7, r6, lsl #2 @ always decrement
- str r7, [sp, r5, lsr #6]
- mov pc, lr
+ str r7, [r2, r9, lsr #6]
+ b do_DataAbort
diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S
index d7cb1bfa51a..2cbf68ef0e8 100644
--- a/arch/arm/mm/abort-macro.S
+++ b/arch/arm/mm/abort-macro.S
@@ -9,34 +9,32 @@
*
*/
- .macro do_thumb_abort
- tst r3, #PSR_T_BIT
+ .macro do_thumb_abort, fsr, pc, psr, tmp
+ tst \psr, #PSR_T_BIT
beq not_thumb
- ldrh r3, [r2] @ Read aborted Thumb instruction
- and r3, r3, # 0xfe00 @ Mask opcode field
- cmp r3, # 0x5600 @ Is it ldrsb?
- orreq r3, r3, #1 << 11 @ Set L-bit if yes
- tst r3, #1 << 11 @ L = 0 -> write
- orreq r1, r1, #1 << 11 @ yes.
- mov pc, lr
+ ldrh \tmp, [\pc] @ Read aborted Thumb instruction
+ and \tmp, \tmp, # 0xfe00 @ Mask opcode field
+ cmp \tmp, # 0x5600 @ Is it ldrsb?
+ orreq \tmp, \tmp, #1 << 11 @ Set L-bit if yes
+ tst \tmp, #1 << 11 @ L = 0 -> write
+ orreq \fsr, \fsr, #1 << 11 @ yes.
+ b do_DataAbort
not_thumb:
.endm
/*
- * We check for the following insturction encoding for LDRD.
+ * We check for the following instruction encoding for LDRD.
*
- * [27:25] == 0
+ * [27:25] == 000
* [7:4] == 1101
* [20] == 0
*/
- .macro do_ldrd_abort
- tst r3, #0x0e000000 @ [27:25] == 0
+ .macro do_ldrd_abort, tmp, insn
+ tst \insn, #0x0e100000 @ [27:25,20] == 0
bne not_ldrd
- and r2, r3, #0x000000f0 @ [7:4] == 1101
- cmp r2, #0x000000d0
- bne not_ldrd
- tst r3, #1 << 20 @ [20] == 0
- moveq pc, lr
+ and \tmp, \insn, #0x000000f0 @ [7:4] == 1101
+ cmp \tmp, #0x000000d0
+ beq do_DataAbort
not_ldrd:
.endm
diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S
index a7cc7f9ee45..119cb479c2a 100644
--- a/arch/arm/mm/abort-nommu.S
+++ b/arch/arm/mm/abort-nommu.S
@@ -3,11 +3,11 @@
/*
* Function: nommu_early_abort
*
- * Params : r2 = address of aborted instruction
- * : r3 = saved SPSR
+ * Params : r2 = pt_regs
+ * : r4 = aborted context pc
+ * : r5 = aborted context psr
*
- * Returns : r0 = 0 (abort address)
- * : r1 = 0 (FSR)
+ * Returns : r4 - r11, r13 preserved
*
* Note: There is no FSR/FAR on !CPU_CP15_MMU cores.
* Just fill zero into the registers.
@@ -16,4 +16,5 @@
ENTRY(nommu_early_abort)
mov r0, #0 @ clear r0, r1 (no FSR/FAR)
mov r1, #0
- mov pc, lr
+ b do_DataAbort
+ENDPROC(nommu_early_abort)
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 074b7cb0774..b8cb1a2688a 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -11,17 +11,24 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/moduleparam.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
-#include <asm/uaccess.h>
+#include <asm/cp15.h>
+#include <asm/system_info.h>
#include <asm/unaligned.h>
+#include <asm/opcodes.h>
#include "fault.h"
+#include "mm.h"
/*
* 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998
@@ -61,6 +68,12 @@
#define SHIFT_ASR 0x40
#define SHIFT_RORRRX 0x60
+#define BAD_INSTR 0xdeadc0de
+
+/* Thumb-2 32 bit format per ARMv7 DDI0406A A6.3, either f800h,e800h,f800h */
+#define IS_T32(hi16) \
+ (((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800))
+
static unsigned long ai_user;
static unsigned long ai_sys;
static unsigned long ai_skipped;
@@ -69,6 +82,40 @@ static unsigned long ai_word;
static unsigned long ai_dword;
static unsigned long ai_multi;
static int ai_usermode;
+static unsigned long cr_no_alignment;
+
+core_param(alignment, ai_usermode, int, 0600);
+
+#define UM_WARN (1 << 0)
+#define UM_FIXUP (1 << 1)
+#define UM_SIGNAL (1 << 2)
+
+/* Return true if and only if the ARMv6 unaligned access model is in use. */
+static bool cpu_is_v6_unaligned(void)
+{
+ return cpu_architecture() >= CPU_ARCH_ARMv6 && get_cr() & CR_U;
+}
+
+static int safe_usermode(int new_usermode, bool warn)
+{
+ /*
+ * ARMv6 and later CPUs can perform unaligned accesses for
+ * most single load and store instructions up to word size.
+ * LDM, STM, LDRD and STRD still need to be handled.
+ *
+ * Ignoring the alignment fault is not an option on these
+ * CPUs since we spin re-faulting the instruction without
+ * making any progress.
+ */
+ if (cpu_is_v6_unaligned() && !(new_usermode & (UM_FIXUP | UM_SIGNAL))) {
+ new_usermode |= UM_FIXUP;
+
+ if (warn)
+ printk(KERN_WARNING "alignment: ignoring faults is unsafe on this CPU. Defaulting to fixup mode.\n");
+ }
+
+ return new_usermode;
+}
#ifdef CONFIG_PROC_FS
static const char *usermode_action[] = {
@@ -80,36 +127,29 @@ static const char *usermode_action[] = {
"signal+warn"
};
-static int
-proc_alignment_read(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+static int alignment_proc_show(struct seq_file *m, void *v)
{
- char *p = page;
- int len;
-
- p += sprintf(p, "User:\t\t%lu\n", ai_user);
- p += sprintf(p, "System:\t\t%lu\n", ai_sys);
- p += sprintf(p, "Skipped:\t%lu\n", ai_skipped);
- p += sprintf(p, "Half:\t\t%lu\n", ai_half);
- p += sprintf(p, "Word:\t\t%lu\n", ai_word);
+ seq_printf(m, "User:\t\t%lu\n", ai_user);
+ seq_printf(m, "System:\t\t%lu\n", ai_sys);
+ seq_printf(m, "Skipped:\t%lu\n", ai_skipped);
+ seq_printf(m, "Half:\t\t%lu\n", ai_half);
+ seq_printf(m, "Word:\t\t%lu\n", ai_word);
if (cpu_architecture() >= CPU_ARCH_ARMv5TE)
- p += sprintf(p, "DWord:\t\t%lu\n", ai_dword);
- p += sprintf(p, "Multi:\t\t%lu\n", ai_multi);
- p += sprintf(p, "User faults:\t%i (%s)\n", ai_usermode,
+ seq_printf(m, "DWord:\t\t%lu\n", ai_dword);
+ seq_printf(m, "Multi:\t\t%lu\n", ai_multi);
+ seq_printf(m, "User faults:\t%i (%s)\n", ai_usermode,
usermode_action[ai_usermode]);
- len = (p - page) - off;
- if (len < 0)
- len = 0;
-
- *eof = (len <= count) ? 1 : 0;
- *start = page + off;
+ return 0;
+}
- return len;
+static int alignment_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, alignment_proc_show, NULL);
}
-static int proc_alignment_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+static ssize_t alignment_proc_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *pos)
{
char mode;
@@ -117,11 +157,18 @@ static int proc_alignment_write(struct file *file, const char __user *buffer,
if (get_user(mode, buffer))
return -EFAULT;
if (mode >= '0' && mode <= '5')
- ai_usermode = mode - '0';
+ ai_usermode = safe_usermode(mode - '0', true);
}
return count;
}
+static const struct file_operations alignment_proc_fops = {
+ .open = alignment_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = alignment_proc_write,
+};
#endif /* CONFIG_PROC_FS */
union offset_union {
@@ -148,17 +195,19 @@ union offset_union {
#define __get8_unaligned_check(ins,val,addr,err) \
__asm__( \
- "1: "ins" %1, [%2], #1\n" \
+ ARM( "1: "ins" %1, [%2], #1\n" ) \
+ THUMB( "1: "ins" %1, [%2]\n" ) \
+ THUMB( " add %2, %2, #1\n" ) \
"2:\n" \
- " .section .fixup,\"ax\"\n" \
+ " .pushsection .fixup,\"ax\"\n" \
" .align 2\n" \
"3: mov %0, #1\n" \
" b 2b\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
+ " .popsection\n" \
+ " .pushsection __ex_table,\"a\"\n" \
" .align 3\n" \
" .long 1b, 3b\n" \
- " .previous\n" \
+ " .popsection\n" \
: "=r" (err), "=&r" (val), "=r" (addr) \
: "0" (err), "2" (addr))
@@ -204,20 +253,22 @@ union offset_union {
do { \
unsigned int err = 0, v = val, a = addr; \
__asm__( FIRST_BYTE_16 \
- "1: "ins" %1, [%2], #1\n" \
+ ARM( "1: "ins" %1, [%2], #1\n" ) \
+ THUMB( "1: "ins" %1, [%2]\n" ) \
+ THUMB( " add %2, %2, #1\n" ) \
" mov %1, %1, "NEXT_BYTE"\n" \
"2: "ins" %1, [%2]\n" \
"3:\n" \
- " .section .fixup,\"ax\"\n" \
+ " .pushsection .fixup,\"ax\"\n" \
" .align 2\n" \
"4: mov %0, #1\n" \
" b 3b\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
+ " .popsection\n" \
+ " .pushsection __ex_table,\"a\"\n" \
" .align 3\n" \
" .long 1b, 4b\n" \
" .long 2b, 4b\n" \
- " .previous\n" \
+ " .popsection\n" \
: "=r" (err), "=&r" (v), "=&r" (a) \
: "0" (err), "1" (v), "2" (a)); \
if (err) \
@@ -234,26 +285,32 @@ union offset_union {
do { \
unsigned int err = 0, v = val, a = addr; \
__asm__( FIRST_BYTE_32 \
- "1: "ins" %1, [%2], #1\n" \
+ ARM( "1: "ins" %1, [%2], #1\n" ) \
+ THUMB( "1: "ins" %1, [%2]\n" ) \
+ THUMB( " add %2, %2, #1\n" ) \
" mov %1, %1, "NEXT_BYTE"\n" \
- "2: "ins" %1, [%2], #1\n" \
+ ARM( "2: "ins" %1, [%2], #1\n" ) \
+ THUMB( "2: "ins" %1, [%2]\n" ) \
+ THUMB( " add %2, %2, #1\n" ) \
" mov %1, %1, "NEXT_BYTE"\n" \
- "3: "ins" %1, [%2], #1\n" \
+ ARM( "3: "ins" %1, [%2], #1\n" ) \
+ THUMB( "3: "ins" %1, [%2]\n" ) \
+ THUMB( " add %2, %2, #1\n" ) \
" mov %1, %1, "NEXT_BYTE"\n" \
"4: "ins" %1, [%2]\n" \
"5:\n" \
- " .section .fixup,\"ax\"\n" \
+ " .pushsection .fixup,\"ax\"\n" \
" .align 2\n" \
"6: mov %0, #1\n" \
" b 5b\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
+ " .popsection\n" \
+ " .pushsection __ex_table,\"a\"\n" \
" .align 3\n" \
" .long 1b, 6b\n" \
" .long 2b, 6b\n" \
" .long 3b, 6b\n" \
" .long 4b, 6b\n" \
- " .previous\n" \
+ " .popsection\n" \
: "=r" (err), "=&r" (v), "=&r" (a) \
: "0" (err), "1" (v), "2" (a)); \
if (err) \
@@ -327,38 +384,48 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr,
struct pt_regs *regs)
{
unsigned int rd = RD_BITS(instr);
-
- if (((rd & 1) == 1) || (rd == 14))
+ unsigned int rd2;
+ int load;
+
+ if ((instr & 0xfe000000) == 0xe8000000) {
+ /* ARMv7 Thumb-2 32-bit LDRD/STRD */
+ rd2 = (instr >> 8) & 0xf;
+ load = !!(LDST_L_BIT(instr));
+ } else if (((rd & 1) == 1) || (rd == 14))
goto bad;
+ else {
+ load = ((instr & 0xf0) == 0xd0);
+ rd2 = rd + 1;
+ }
ai_dword += 1;
if (user_mode(regs))
goto user;
- if ((instr & 0xf0) == 0xd0) {
+ if (load) {
unsigned long val;
get32_unaligned_check(val, addr);
regs->uregs[rd] = val;
get32_unaligned_check(val, addr + 4);
- regs->uregs[rd + 1] = val;
+ regs->uregs[rd2] = val;
} else {
put32_unaligned_check(regs->uregs[rd], addr);
- put32_unaligned_check(regs->uregs[rd + 1], addr + 4);
+ put32_unaligned_check(regs->uregs[rd2], addr + 4);
}
return TYPE_LDST;
user:
- if ((instr & 0xf0) == 0xd0) {
+ if (load) {
unsigned long val;
get32t_unaligned_check(val, addr);
regs->uregs[rd] = val;
get32t_unaligned_check(val, addr + 4);
- regs->uregs[rd + 1] = val;
+ regs->uregs[rd2] = val;
} else {
put32t_unaligned_check(regs->uregs[rd], addr);
- put32t_unaligned_check(regs->uregs[rd + 1], addr + 4);
+ put32t_unaligned_check(regs->uregs[rd2], addr + 4);
}
return TYPE_LDST;
@@ -611,32 +678,112 @@ thumb2arm(u16 tinstr)
/* Else fall through for illegal instruction case */
default:
- return 0xdeadc0de;
+ return BAD_INSTR;
}
}
+/*
+ * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction
+ * handlable by ARM alignment handler, also find the corresponding handler,
+ * so that we can reuse ARM userland alignment fault fixups for Thumb.
+ *
+ * @pinstr: original Thumb-2 instruction; returns new handlable instruction
+ * @regs: register context.
+ * @poffset: return offset from faulted addr for later writeback
+ *
+ * NOTES:
+ * 1. Comments below refer to ARMv7 DDI0406A Thumb Instruction sections.
+ * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt)
+ */
+static void *
+do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
+ union offset_union *poffset)
+{
+ unsigned long instr = *pinstr;
+ u16 tinst1 = (instr >> 16) & 0xffff;
+ u16 tinst2 = instr & 0xffff;
+
+ switch (tinst1 & 0xffe0) {
+ /* A6.3.5 Load/Store multiple */
+ case 0xe880: /* STM/STMIA/STMEA,LDM/LDMIA, PUSH/POP T2 */
+ case 0xe8a0: /* ...above writeback version */
+ case 0xe900: /* STMDB/STMFD, LDMDB/LDMEA */
+ case 0xe920: /* ...above writeback version */
+ /* no need offset decision since handler calculates it */
+ return do_alignment_ldmstm;
+
+ case 0xf840: /* POP/PUSH T3 (single register) */
+ if (RN_BITS(instr) == 13 && (tinst2 & 0x09ff) == 0x0904) {
+ u32 L = !!(LDST_L_BIT(instr));
+ const u32 subset[2] = {
+ 0xe92d0000, /* STMDB sp!,{registers} */
+ 0xe8bd0000, /* LDMIA sp!,{registers} */
+ };
+ *pinstr = subset[L] | (1<<RD_BITS(instr));
+ return do_alignment_ldmstm;
+ }
+ /* Else fall through for illegal instruction case */
+ break;
+
+ /* A6.3.6 Load/store double, STRD/LDRD(immed, lit, reg) */
+ case 0xe860:
+ case 0xe960:
+ case 0xe8e0:
+ case 0xe9e0:
+ poffset->un = (tinst2 & 0xff) << 2;
+ case 0xe940:
+ case 0xe9c0:
+ return do_alignment_ldrdstrd;
+
+ /*
+ * No need to handle load/store instructions up to word size
+ * since ARMv6 and later CPUs can perform unaligned accesses.
+ */
+ default:
+ break;
+ }
+ return NULL;
+}
+
static int
do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
- union offset_union offset;
+ union offset_union uninitialized_var(offset);
unsigned long instr = 0, instrptr;
int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
unsigned int type;
- mm_segment_t fs;
unsigned int fault;
u16 tinstr = 0;
+ int isize = 4;
+ int thumb2_32b = 0;
+
+ if (interrupts_enabled(regs))
+ local_irq_enable();
instrptr = instruction_pointer(regs);
- fs = get_fs();
- set_fs(KERNEL_DS);
if (thumb_mode(regs)) {
- fault = __get_user(tinstr, (u16 *)(instrptr & ~1));
- if (!(fault))
- instr = thumb2arm(tinstr);
- } else
- fault = __get_user(instr, (u32 *)instrptr);
- set_fs(fs);
+ u16 *ptr = (u16 *)(instrptr & ~1);
+ fault = probe_kernel_address(ptr, tinstr);
+ tinstr = __mem_to_opcode_thumb16(tinstr);
+ if (!fault) {
+ if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
+ IS_T32(tinstr)) {
+ /* Thumb-2 32-bit */
+ u16 tinst2 = 0;
+ fault = probe_kernel_address(ptr + 1, tinst2);
+ tinst2 = __mem_to_opcode_thumb16(tinst2);
+ instr = __opcode_thumb32_compose(tinstr, tinst2);
+ thumb2_32b = 1;
+ } else {
+ isize = 2;
+ instr = thumb2arm(tinstr);
+ }
+ }
+ } else {
+ fault = probe_kernel_address(instrptr, instr);
+ instr = __mem_to_opcode_arm(instr);
+ }
if (fault) {
type = TYPE_FAULT;
@@ -650,7 +797,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
fixup:
- regs->ARM_pc += thumb_mode(regs) ? 2 : 4;
+ regs->ARM_pc += isize;
switch (CODING_BITS(instr)) {
case 0x00000000: /* 3.13.4 load/store instruction extensions */
@@ -709,18 +856,28 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
handler = do_alignment_ldrstr;
break;
- case 0x08000000: /* ldm or stm */
- handler = do_alignment_ldmstm;
+ case 0x08000000: /* ldm or stm, or thumb-2 32bit instruction */
+ if (thumb2_32b) {
+ offset.un = 0;
+ handler = do_alignment_t32_to_handler(&instr, regs, &offset);
+ } else {
+ offset.un = 0;
+ handler = do_alignment_ldmstm;
+ }
break;
default:
goto bad;
}
+ if (!handler)
+ goto bad;
type = handler(addr, instr, regs);
- if (type == TYPE_ERROR || type == TYPE_FAULT)
+ if (type == TYPE_ERROR || type == TYPE_FAULT) {
+ regs->ARM_pc -= isize;
goto bad_or_fault;
+ }
if (type == TYPE_LDST)
do_alignment_finish_ldst(addr, instr, regs, offset);
@@ -730,7 +887,6 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
bad_or_fault:
if (type == TYPE_ERROR)
goto bad;
- regs->ARM_pc -= thumb_mode(regs) ? 2 : 4;
/*
* We got a fault - fix it up, or die.
*/
@@ -746,33 +902,62 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
*/
printk(KERN_ERR "Alignment trap: not handling instruction "
"%0*lx at [<%08lx>]\n",
- thumb_mode(regs) ? 4 : 8,
- thumb_mode(regs) ? tinstr : instr, instrptr);
+ isize << 1,
+ isize == 2 ? tinstr : instr, instrptr);
ai_skipped += 1;
return 1;
user:
ai_user += 1;
- if (ai_usermode & 1)
+ if (ai_usermode & UM_WARN)
printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx "
"Address=0x%08lx FSR 0x%03x\n", current->comm,
- current->pid, instrptr,
- thumb_mode(regs) ? 4 : 8,
- thumb_mode(regs) ? tinstr : instr,
+ task_pid_nr(current), instrptr,
+ isize << 1,
+ isize == 2 ? tinstr : instr,
addr, fsr);
- if (ai_usermode & 2)
+ if (ai_usermode & UM_FIXUP)
goto fixup;
- if (ai_usermode & 4)
- force_sig(SIGBUS, current);
- else
- set_cr(cr_no_alignment);
+ if (ai_usermode & UM_SIGNAL) {
+ siginfo_t si;
+
+ si.si_signo = SIGBUS;
+ si.si_errno = 0;
+ si.si_code = BUS_ADRALN;
+ si.si_addr = (void __user *)addr;
+
+ force_sig_info(si.si_signo, &si, current);
+ } else {
+ /*
+ * We're about to disable the alignment trap and return to
+ * user space. But if an interrupt occurs before actually
+ * reaching user space, then the IRQ vector entry code will
+ * notice that we were still in kernel space and therefore
+ * the alignment trap won't be re-enabled in that case as it
+ * is presumed to be always on from kernel space.
+ * Let's prevent that race by disabling interrupts here (they
+ * are disabled on the way back to user space anyway in
+ * entry-common.S) and disable the alignment trap only if
+ * there is no work pending for this thread.
+ */
+ raw_local_irq_disable();
+ if (!(current_thread_info()->flags & _TIF_WORK_MASK))
+ set_cr(cr_no_alignment);
+ }
return 0;
}
+static int __init noalign_setup(char *__unused)
+{
+ set_cr(__clear_cr(CR_A));
+ return 1;
+}
+__setup("noalign", noalign_setup);
+
/*
* This needs to be done after sysctl_init, otherwise sys/ will be
* overwritten. Actually, this shouldn't be in sys/ at all since
@@ -784,20 +969,33 @@ static int __init alignment_init(void)
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *res;
- res = proc_mkdir("cpu", NULL);
+ res = proc_create("cpu/alignment", S_IWUSR | S_IRUGO, NULL,
+ &alignment_proc_fops);
if (!res)
return -ENOMEM;
+#endif
- res = create_proc_entry("alignment", S_IWUSR | S_IRUGO, res);
- if (!res)
- return -ENOMEM;
+ if (cpu_is_v6_unaligned()) {
+ set_cr(__clear_cr(CR_A));
+ ai_usermode = safe_usermode(ai_usermode, false);
+ }
- res->read_proc = proc_alignment_read;
- res->write_proc = proc_alignment_write;
-#endif
+ cr_no_alignment = get_cr() & ~CR_A;
- hook_fault_code(1, do_alignment, SIGILL, "alignment exception");
- hook_fault_code(3, do_alignment, SIGILL, "alignment exception");
+ hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN,
+ "alignment exception");
+
+ /*
+ * ARMv6K and ARMv7 use fault status 3 (0b00011) as Access Flag section
+ * fault, not as alignment error.
+ *
+ * TODO: handle ARMv6K properly. Runtime check for 'K' extension is
+ * needed.
+ */
+ if (cpu_architecture() <= CPU_ARCH_ARMv6) {
+ hook_fault_code(3, do_alignment, SIGBUS, BUS_ADRALN,
+ "alignment exception");
+ }
return 0;
}
diff --git a/arch/arm/mm/cache-aurora-l2.h b/arch/arm/mm/cache-aurora-l2.h
new file mode 100644
index 00000000000..c8612476983
--- /dev/null
+++ b/arch/arm/mm/cache-aurora-l2.h
@@ -0,0 +1,55 @@
+/*
+ * AURORA shared L2 cache controller support
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Yehuda Yitschak <yehuday@marvell.com>
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __ASM_ARM_HARDWARE_AURORA_L2_H
+#define __ASM_ARM_HARDWARE_AURORA_L2_H
+
+#define AURORA_SYNC_REG 0x700
+#define AURORA_RANGE_BASE_ADDR_REG 0x720
+#define AURORA_FLUSH_PHY_ADDR_REG 0x7f0
+#define AURORA_INVAL_RANGE_REG 0x774
+#define AURORA_CLEAN_RANGE_REG 0x7b4
+#define AURORA_FLUSH_RANGE_REG 0x7f4
+
+#define AURORA_ACR_REPLACEMENT_OFFSET 27
+#define AURORA_ACR_REPLACEMENT_MASK \
+ (0x3 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_WAYRR \
+ (0 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_LFSR \
+ (1 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU \
+ (3 << AURORA_ACR_REPLACEMENT_OFFSET)
+
+#define AURORA_ACR_FORCE_WRITE_POLICY_OFFSET 0
+#define AURORA_ACR_FORCE_WRITE_POLICY_MASK \
+ (0x3 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_POLICY_DIS \
+ (0 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_BACK_POLICY \
+ (1 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_THRO_POLICY \
+ (2 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+
+#define MAX_RANGE_SIZE 1024
+
+#define AURORA_WAY_SIZE_SHIFT 2
+
+#define AURORA_CTRL_FW 0x100
+
+/* chose a number outside L2X0_CACHE_ID_PART_MASK to be sure to make
+ * the distinction between a number coming from hardware and a number
+ * coming from the device tree */
+#define AURORA_CACHE_ID 0x100
+
+#endif /* __ASM_ARM_HARDWARE_AURORA_L2_H */
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
new file mode 100644
index 00000000000..e505befe51b
--- /dev/null
+++ b/arch/arm/mm/cache-fa.S
@@ -0,0 +1,249 @@
+/*
+ * linux/arch/arm/mm/cache-fa.S
+ *
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on cache-v4wb.S:
+ * Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Processors: FA520 FA526 FA626
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE 16
+
+/*
+ * The total size of the data cache.
+ */
+#ifdef CONFIG_ARCH_GEMINI
+#define CACHE_DSIZE 8192
+#else
+#define CACHE_DSIZE 16384
+#endif
+
+/* FIXME: put optimal value here. Current one is just estimation */
+#define CACHE_DLIMIT (CACHE_DSIZE * 2)
+
+/*
+ * flush_icache_all()
+ *
+ * Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(fa_flush_icache_all)
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
+ mov pc, lr
+ENDPROC(fa_flush_icache_all)
+
+/*
+ * flush_user_cache_all()
+ *
+ * Clean and invalidate all cache entries in a particular address
+ * space.
+ */
+ENTRY(fa_flush_user_cache_all)
+ /* FALLTHROUGH */
+/*
+ * flush_kern_cache_all()
+ *
+ * Clean and invalidate the entire cache.
+ */
+ENTRY(fa_flush_kern_cache_all)
+ mov ip, #0
+ mov r2, #VM_EXEC
+__flush_whole_cache:
+ mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache
+ tst r2, #VM_EXEC
+ mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
+ mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
+ mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer
+ mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * flush_user_cache_range(start, end, flags)
+ *
+ * Invalidate a range of cache entries in the specified
+ * address space.
+ *
+ * - start - start address (inclusive, page aligned)
+ * - end - end address (exclusive, page aligned)
+ * - flags - vma_area_struct flags describing address space
+ */
+ENTRY(fa_flush_user_cache_range)
+ mov ip, #0
+ sub r3, r1, r0 @ calculate total size
+ cmp r3, #CACHE_DLIMIT @ total size >= limit?
+ bhs __flush_whole_cache @ flush whole D cache
+
+1: tst r2, #VM_EXEC
+ mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ tst r2, #VM_EXEC
+ mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB
+ mcrne p15, 0, ip, c7, c10, 4 @ data write barrier
+ mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * coherent_kern_range(start, end)
+ *
+ * Ensure coherency between the Icache and the Dcache in the
+ * region described by start. If you have non-snooping
+ * Harvard caches, you need to implement this function.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_coherent_kern_range)
+ /* fall through */
+
+/*
+ * coherent_user_range(start, end)
+ *
+ * Ensure coherency between the Icache and the Dcache in the
+ * region described by start. If you have non-snooping
+ * Harvard caches, you need to implement this function.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+ENTRY(fa_coherent_user_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry
+ mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mcr p15, 0, r0, c7, c5, 4 @ prefetch flush
+ mov pc, lr
+
+/*
+ * flush_kern_dcache_area(void *addr, size_t size)
+ *
+ * Ensure that the data held in the page kaddr is written back
+ * to the page in question.
+ *
+ * - addr - kernel address
+ * - size - size of region
+ */
+ENTRY(fa_flush_kern_dcache_area)
+ add r1, r0, r1
+1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_inv_range(start, end)
+ *
+ * Invalidate (discard) the specified virtual address range.
+ * May not write back any entries. If 'start' or 'end'
+ * are not cache line aligned, those lines must be written
+ * back.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+fa_dma_inv_range:
+ tst r0, #CACHE_DLINESIZE - 1
+ bic r0, r0, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry
+ tst r1, #CACHE_DLINESIZE - 1
+ bic r1, r1, #CACHE_DLINESIZE - 1
+ mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry
+1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_clean_range(start, end)
+ *
+ * Clean (write back) the specified virtual address range.
+ *
+ * - start - virtual start address
+ * - end - virtual end address
+ */
+fa_dma_clean_range:
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_flush_range(start,end)
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(fa_dma_flush_range)
+ bic r0, r0, #CACHE_DLINESIZE - 1
+1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry
+ add r0, r0, #CACHE_DLINESIZE
+ cmp r0, r1
+ blo 1b
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+ mov pc, lr
+
+/*
+ * dma_map_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(fa_dma_map_area)
+ add r1, r1, r0
+ cmp r2, #DMA_TO_DEVICE
+ beq fa_dma_clean_range
+ bcs fa_dma_inv_range
+ b fa_dma_flush_range
+ENDPROC(fa_dma_map_area)
+
+/*
+ * dma_unmap_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(fa_dma_unmap_area)
+ mov pc, lr
+ENDPROC(fa_dma_unmap_area)
+
+ .globl fa_flush_kern_cache_louis
+ .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all
+
+ __INITDATA
+
+ @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+ define_cache_functions fa
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
new file mode 100644
index 00000000000..e028a7f2ebc
--- /dev/null
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -0,0 +1,396 @@
+/*
+ * arch/arm/mm/cache-feroceon-l2.c - Feroceon L2 cache controller support
+ *
+ * Copyright (C) 2008 Marvell Semiconductor
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * References:
+ * - Unified Layer 2 Cache for Feroceon CPU Cores,
+ * Document ID MV-S104858-00, Rev. A, October 23 2007.
+ */
+
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/cp15.h>
+#include <asm/hardware/cache-feroceon-l2.h>
+
+#define L2_WRITETHROUGH_KIRKWOOD BIT(4)
+
+/*
+ * Low-level cache maintenance operations.
+ *
+ * As well as the regular 'clean/invalidate/flush L2 cache line by
+ * MVA' instructions, the Feroceon L2 cache controller also features
+ * 'clean/invalidate L2 range by MVA' operations.
+ *
+ * Cache range operations are initiated by writing the start and
+ * end addresses to successive cp15 registers, and process every
+ * cache line whose first byte address lies in the inclusive range
+ * [start:end].
+ *
+ * The cache range operations stall the CPU pipeline until completion.
+ *
+ * The range operations require two successive cp15 writes, in
+ * between which we don't want to be preempted.
+ */
+
+static inline unsigned long l2_get_va(unsigned long paddr)
+{
+#ifdef CONFIG_HIGHMEM
+ /*
+ * Because range ops can't be done on physical addresses,
+ * we simply install a virtual mapping for it only for the
+ * TLB lookup to occur, hence no need to flush the untouched
+ * memory mapping afterwards (note: a cache flush may happen
+ * in some circumstances depending on the path taken in kunmap_atomic).
+ */
+ void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT);
+ return (unsigned long)vaddr + (paddr & ~PAGE_MASK);
+#else
+ return __phys_to_virt(paddr);
+#endif
+}
+
+static inline void l2_put_va(unsigned long vaddr)
+{
+#ifdef CONFIG_HIGHMEM
+ kunmap_atomic((void *)vaddr);
+#endif
+}
+
+static inline void l2_clean_pa(unsigned long addr)
+{
+ __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr));
+}
+
+static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
+{
+ unsigned long va_start, va_end, flags;
+
+ /*
+ * Make sure 'start' and 'end' reference the same page, as
+ * L2 is PIPT and range operations only do a TLB lookup on
+ * the start address.
+ */
+ BUG_ON((start ^ end) >> PAGE_SHIFT);
+
+ va_start = l2_get_va(start);
+ va_end = va_start + (end - start);
+ raw_local_irq_save(flags);
+ __asm__("mcr p15, 1, %0, c15, c9, 4\n\t"
+ "mcr p15, 1, %1, c15, c9, 5"
+ : : "r" (va_start), "r" (va_end));
+ raw_local_irq_restore(flags);
+ l2_put_va(va_start);
+}
+
+static inline void l2_clean_inv_pa(unsigned long addr)
+{
+ __asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr));
+}
+
+static inline void l2_inv_pa(unsigned long addr)
+{
+ __asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr));
+}
+
+static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
+{
+ unsigned long va_start, va_end, flags;
+
+ /*
+ * Make sure 'start' and 'end' reference the same page, as
+ * L2 is PIPT and range operations only do a TLB lookup on
+ * the start address.
+ */
+ BUG_ON((start ^ end) >> PAGE_SHIFT);
+
+ va_start = l2_get_va(start);
+ va_end = va_start + (end - start);
+ raw_local_irq_save(flags);
+ __asm__("mcr p15, 1, %0, c15, c11, 4\n\t"
+ "mcr p15, 1, %1, c15, c11, 5"
+ : : "r" (va_start), "r" (va_end));
+ raw_local_irq_restore(flags);
+ l2_put_va(va_start);
+}
+
+static inline void l2_inv_all(void)
+{
+ __asm__("mcr p15, 1, %0, c15, c11, 0" : : "r" (0));
+}
+
+/*
+ * Linux primitives.
+ *
+ * Note that the end addresses passed to Linux primitives are
+ * noninclusive, while the hardware cache range operations use
+ * inclusive start and end addresses.
+ */
+#define CACHE_LINE_SIZE 32
+#define MAX_RANGE_SIZE 1024
+
+static int l2_wt_override;
+
+static unsigned long calc_range_end(unsigned long start, unsigned long end)
+{
+ unsigned long range_end;
+
+ BUG_ON(start & (CACHE_LINE_SIZE - 1));
+ BUG_ON(end & (CACHE_LINE_SIZE - 1));
+
+ /*
+ * Try to process all cache lines between 'start' and 'end'.
+ */
+ range_end = end;
+
+ /*
+ * Limit the number of cache lines processed at once,
+ * since cache range operations stall the CPU pipeline
+ * until completion.
+ */
+ if (range_end > start + MAX_RANGE_SIZE)
+ range_end = start + MAX_RANGE_SIZE;
+
+ /*
+ * Cache range operations can't straddle a page boundary.
+ */
+ if (range_end > (start | (PAGE_SIZE - 1)) + 1)
+ range_end = (start | (PAGE_SIZE - 1)) + 1;
+
+ return range_end;
+}
+
+static void feroceon_l2_inv_range(unsigned long start, unsigned long end)
+{
+ /*
+ * Clean and invalidate partial first cache line.
+ */
+ if (start & (CACHE_LINE_SIZE - 1)) {
+ l2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1));
+ start = (start | (CACHE_LINE_SIZE - 1)) + 1;
+ }
+
+ /*
+ * Clean and invalidate partial last cache line.
+ */
+ if (start < end && end & (CACHE_LINE_SIZE - 1)) {
+ l2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1));
+ end &= ~(CACHE_LINE_SIZE - 1);
+ }
+
+ /*
+ * Invalidate all full cache lines between 'start' and 'end'.
+ */
+ while (start < end) {
+ unsigned long range_end = calc_range_end(start, end);
+ l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE);
+ start = range_end;
+ }
+
+ dsb();
+}
+
+static void feroceon_l2_clean_range(unsigned long start, unsigned long end)
+{
+ /*
+ * If L2 is forced to WT, the L2 will always be clean and we
+ * don't need to do anything here.
+ */
+ if (!l2_wt_override) {
+ start &= ~(CACHE_LINE_SIZE - 1);
+ end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
+ while (start != end) {
+ unsigned long range_end = calc_range_end(start, end);
+ l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE);
+ start = range_end;
+ }
+ }
+
+ dsb();
+}
+
+static void feroceon_l2_flush_range(unsigned long start, unsigned long end)
+{
+ start &= ~(CACHE_LINE_SIZE - 1);
+ end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
+ while (start != end) {
+ unsigned long range_end = calc_range_end(start, end);
+ if (!l2_wt_override)
+ l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE);
+ l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE);
+ start = range_end;
+ }
+
+ dsb();
+}
+
+
+/*
+ * Routines to disable and re-enable the D-cache and I-cache at run
+ * time. These are necessary because the L2 cache can only be enabled
+ * or disabled while the L1 Dcache and Icache are both disabled.
+ */
+static int __init flush_and_disable_dcache(void)
+{
+ u32 cr;
+
+ cr = get_cr();
+ if (cr & CR_C) {
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ flush_cache_all();
+ set_cr(cr & ~CR_C);
+ raw_local_irq_restore(flags);
+ return 1;
+ }
+ return 0;
+}
+
+static void __init enable_dcache(void)
+{
+ u32 cr;
+
+ cr = get_cr();
+ set_cr(cr | CR_C);
+}
+
+static void __init __invalidate_icache(void)
+{
+ __asm__("mcr p15, 0, %0, c7, c5, 0" : : "r" (0));
+}
+
+static int __init invalidate_and_disable_icache(void)
+{
+ u32 cr;
+
+ cr = get_cr();
+ if (cr & CR_I) {
+ set_cr(cr & ~CR_I);
+ __invalidate_icache();
+ return 1;
+ }
+ return 0;
+}
+
+static void __init enable_icache(void)
+{
+ u32 cr;
+
+ cr = get_cr();
+ set_cr(cr | CR_I);
+}
+
+static inline u32 read_extra_features(void)
+{
+ u32 u;
+
+ __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u));
+
+ return u;
+}
+
+static inline void write_extra_features(u32 u)
+{
+ __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u));
+}
+
+static void __init disable_l2_prefetch(void)
+{
+ u32 u;
+
+ /*
+ * Read the CPU Extra Features register and verify that the
+ * Disable L2 Prefetch bit is set.
+ */
+ u = read_extra_features();
+ if (!(u & 0x01000000)) {
+ printk(KERN_INFO "Feroceon L2: Disabling L2 prefetch.\n");
+ write_extra_features(u | 0x01000000);
+ }
+}
+
+static void __init enable_l2(void)
+{
+ u32 u;
+
+ u = read_extra_features();
+ if (!(u & 0x00400000)) {
+ int i, d;
+
+ printk(KERN_INFO "Feroceon L2: Enabling L2\n");
+
+ d = flush_and_disable_dcache();
+ i = invalidate_and_disable_icache();
+ l2_inv_all();
+ write_extra_features(u | 0x00400000);
+ if (i)
+ enable_icache();
+ if (d)
+ enable_dcache();
+ } else
+ pr_err(FW_BUG
+ "Feroceon L2: bootloader left the L2 cache on!\n");
+}
+
+void __init feroceon_l2_init(int __l2_wt_override)
+{
+ l2_wt_override = __l2_wt_override;
+
+ disable_l2_prefetch();
+
+ outer_cache.inv_range = feroceon_l2_inv_range;
+ outer_cache.clean_range = feroceon_l2_clean_range;
+ outer_cache.flush_range = feroceon_l2_flush_range;
+
+ enable_l2();
+
+ printk(KERN_INFO "Feroceon L2: Cache support initialised%s.\n",
+ l2_wt_override ? ", in WT override mode" : "");
+}
+#ifdef CONFIG_OF
+static const struct of_device_id feroceon_ids[] __initconst = {
+ { .compatible = "marvell,kirkwood-cache"},
+ { .compatible = "marvell,feroceon-cache"},
+ {}
+};
+
+int __init feroceon_of_init(void)
+{
+ struct device_node *node;
+ void __iomem *base;
+ bool l2_wt_override = false;
+ struct resource res;
+
+#if defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
+ l2_wt_override = true;
+#endif
+
+ node = of_find_matching_node(NULL, feroceon_ids);
+ if (node && of_device_is_compatible(node, "marvell,kirkwood-cache")) {
+ if (of_address_to_resource(node, 0, &res))
+ return -ENODEV;
+
+ base = ioremap(res.start, resource_size(&res));
+ if (!base)
+ return -ENOMEM;
+
+ if (l2_wt_override)
+ writel(readl(base) | L2_WRITETHROUGH_KIRKWOOD, base);
+ else
+ writel(readl(base) & ~L2_WRITETHROUGH_KIRKWOOD, base);
+ }
+
+ feroceon_l2_init(l2_wt_override);
+
+ return 0;
+}
+#endif
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 76b800a9519..7c3fb41a462 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -16,105 +16,1532 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/cpu.h>
+#include <linux/err.h>
#include <linux/init.h>
+#include <linux/smp.h>
#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/cacheflush.h>
-#include <asm/io.h>
+#include <asm/cp15.h>
+#include <asm/cputype.h>
#include <asm/hardware/cache-l2x0.h>
+#include "cache-tauros3.h"
+#include "cache-aurora-l2.h"
+
+struct l2c_init_data {
+ const char *type;
+ unsigned way_size_0;
+ unsigned num_lock;
+ void (*of_parse)(const struct device_node *, u32 *, u32 *);
+ void (*enable)(void __iomem *, u32, unsigned);
+ void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
+ void (*save)(void __iomem *);
+ struct outer_cache_fns outer_cache;
+};
#define CACHE_LINE_SIZE 32
static void __iomem *l2x0_base;
-static DEFINE_SPINLOCK(l2x0_lock);
+static DEFINE_RAW_SPINLOCK(l2x0_lock);
+static u32 l2x0_way_mask; /* Bitmask of active ways */
+static u32 l2x0_size;
+static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
+
+struct l2x0_regs l2x0_saved_regs;
+
+/*
+ * Common code for all cache controllers.
+ */
+static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)
+{
+ /* wait for cache operation by line or way to complete */
+ while (readl_relaxed(reg) & mask)
+ cpu_relax();
+}
-static inline void sync_writel(unsigned long val, unsigned long reg,
- unsigned long complete_mask)
+/*
+ * By default, we write directly to secure registers. Platforms must
+ * override this if they are running non-secure.
+ */
+static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg)
+{
+ if (val == readl_relaxed(base + reg))
+ return;
+ if (outer_cache.write_sec)
+ outer_cache.write_sec(val, reg);
+ else
+ writel_relaxed(val, base + reg);
+}
+
+/*
+ * This should only be called when we have a requirement that the
+ * register be written due to a work-around, as platforms running
+ * in non-secure mode may not be able to access this register.
+ */
+static inline void l2c_set_debug(void __iomem *base, unsigned long val)
+{
+ l2c_write_sec(val, base, L2X0_DEBUG_CTRL);
+}
+
+static void __l2c_op_way(void __iomem *reg)
+{
+ writel_relaxed(l2x0_way_mask, reg);
+ l2c_wait_mask(reg, l2x0_way_mask);
+}
+
+static inline void l2c_unlock(void __iomem *base, unsigned num)
+{
+ unsigned i;
+
+ for (i = 0; i < num; i++) {
+ writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE +
+ i * L2X0_LOCKDOWN_STRIDE);
+ writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE +
+ i * L2X0_LOCKDOWN_STRIDE);
+ }
+}
+
+/*
+ * Enable the L2 cache controller. This function must only be
+ * called when the cache controller is known to be disabled.
+ */
+static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
{
unsigned long flags;
- spin_lock_irqsave(&l2x0_lock, flags);
- writel(val, l2x0_base + reg);
- /* wait for the operation to complete */
- while (readl(l2x0_base + reg) & complete_mask)
- ;
- spin_unlock_irqrestore(&l2x0_lock, flags);
+ l2c_write_sec(aux, base, L2X0_AUX_CTRL);
+
+ l2c_unlock(base, num_lock);
+
+ local_irq_save(flags);
+ __l2c_op_way(base + L2X0_INV_WAY);
+ writel_relaxed(0, base + sync_reg_offset);
+ l2c_wait_mask(base + sync_reg_offset, 1);
+ local_irq_restore(flags);
+
+ l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL);
+}
+
+static void l2c_disable(void)
+{
+ void __iomem *base = l2x0_base;
+
+ outer_cache.flush_all();
+ l2c_write_sec(0, base, L2X0_CTRL);
+ dsb(st);
}
+#ifdef CONFIG_CACHE_PL310
+static inline void cache_wait(void __iomem *reg, unsigned long mask)
+{
+ /* cache operations by line are atomic on PL310 */
+}
+#else
+#define cache_wait l2c_wait_mask
+#endif
+
static inline void cache_sync(void)
{
- sync_writel(0, L2X0_CACHE_SYNC, 1);
+ void __iomem *base = l2x0_base;
+
+ writel_relaxed(0, base + sync_reg_offset);
+ cache_wait(base + L2X0_CACHE_SYNC, 1);
+}
+
+#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
+static inline void debug_writel(unsigned long val)
+{
+ l2c_set_debug(l2x0_base, val);
+}
+#else
+/* Optimised out for non-errata case */
+static inline void debug_writel(unsigned long val)
+{
}
+#endif
-static inline void l2x0_inv_all(void)
+static void l2x0_cache_sync(void)
{
- /* invalidate all ways */
- sync_writel(0xff, L2X0_INV_WAY, 0xff);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
cache_sync();
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void __l2x0_flush_all(void)
+{
+ debug_writel(0x03);
+ __l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
+ cache_sync();
+ debug_writel(0x00);
+}
+
+static void l2x0_flush_all(void)
+{
+ unsigned long flags;
+
+ /* clean all ways */
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ __l2x0_flush_all();
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
}
-static void l2x0_inv_range(unsigned long start, unsigned long end)
+static void l2x0_disable(void)
{
- unsigned long addr;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ __l2x0_flush_all();
+ l2c_write_sec(0, l2x0_base, L2X0_CTRL);
+ dsb(st);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c_save(void __iomem *base)
+{
+ l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+}
+
+/*
+ * L2C-210 specific code.
+ *
+ * The L2C-2x0 PA, set/way and sync operations are atomic, but we must
+ * ensure that no background operation is running. The way operations
+ * are all background tasks.
+ *
+ * While a background operation is in progress, any new operation is
+ * ignored (unspecified whether this causes an error.) Thankfully, not
+ * used on SMP.
+ *
+ * Never has a different sync register other than L2X0_CACHE_SYNC, but
+ * we use sync_reg_offset here so we can share some of this with L2C-310.
+ */
+static void __l2c210_cache_sync(void __iomem *base)
+{
+ writel_relaxed(0, base + sync_reg_offset);
+}
+
+static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start,
+ unsigned long end)
+{
+ while (start < end) {
+ writel_relaxed(start, reg);
+ start += CACHE_LINE_SIZE;
+ }
+}
+
+static void l2c210_inv_range(unsigned long start, unsigned long end)
+{
+ void __iomem *base = l2x0_base;
if (start & (CACHE_LINE_SIZE - 1)) {
start &= ~(CACHE_LINE_SIZE - 1);
- sync_writel(start, L2X0_CLEAN_INV_LINE_PA, 1);
+ writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
start += CACHE_LINE_SIZE;
}
if (end & (CACHE_LINE_SIZE - 1)) {
end &= ~(CACHE_LINE_SIZE - 1);
- sync_writel(end, L2X0_CLEAN_INV_LINE_PA, 1);
+ writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
}
- for (addr = start; addr < end; addr += CACHE_LINE_SIZE)
- sync_writel(addr, L2X0_INV_LINE_PA, 1);
- cache_sync();
+ __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
+ __l2c210_cache_sync(base);
}
-static void l2x0_clean_range(unsigned long start, unsigned long end)
+static void l2c210_clean_range(unsigned long start, unsigned long end)
{
- unsigned long addr;
+ void __iomem *base = l2x0_base;
start &= ~(CACHE_LINE_SIZE - 1);
- for (addr = start; addr < end; addr += CACHE_LINE_SIZE)
- sync_writel(addr, L2X0_CLEAN_LINE_PA, 1);
- cache_sync();
+ __l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end);
+ __l2c210_cache_sync(base);
}
-static void l2x0_flush_range(unsigned long start, unsigned long end)
+static void l2c210_flush_range(unsigned long start, unsigned long end)
{
- unsigned long addr;
+ void __iomem *base = l2x0_base;
start &= ~(CACHE_LINE_SIZE - 1);
- for (addr = start; addr < end; addr += CACHE_LINE_SIZE)
- sync_writel(addr, L2X0_CLEAN_INV_LINE_PA, 1);
- cache_sync();
+ __l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end);
+ __l2c210_cache_sync(base);
}
-void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
+static void l2c210_flush_all(void)
{
- __u32 aux;
+ void __iomem *base = l2x0_base;
- l2x0_base = base;
+ BUG_ON(!irqs_disabled());
+
+ __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+ __l2c210_cache_sync(base);
+}
+
+static void l2c210_sync(void)
+{
+ __l2c210_cache_sync(l2x0_base);
+}
+
+static void l2c210_resume(void)
+{
+ void __iomem *base = l2x0_base;
+
+ if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
+ l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1);
+}
+
+static const struct l2c_init_data l2c210_data __initconst = {
+ .type = "L2C-210",
+ .way_size_0 = SZ_8K,
+ .num_lock = 1,
+ .enable = l2c_enable,
+ .save = l2c_save,
+ .outer_cache = {
+ .inv_range = l2c210_inv_range,
+ .clean_range = l2c210_clean_range,
+ .flush_range = l2c210_flush_range,
+ .flush_all = l2c210_flush_all,
+ .disable = l2c_disable,
+ .sync = l2c210_sync,
+ .resume = l2c210_resume,
+ },
+};
+
+/*
+ * L2C-220 specific code.
+ *
+ * All operations are background operations: they have to be waited for.
+ * Conflicting requests generate a slave error (which will cause an
+ * imprecise abort.) Never uses sync_reg_offset, so we hard-code the
+ * sync register here.
+ *
+ * However, we can re-use the l2c210_resume call.
+ */
+static inline void __l2c220_cache_sync(void __iomem *base)
+{
+ writel_relaxed(0, base + L2X0_CACHE_SYNC);
+ l2c_wait_mask(base + L2X0_CACHE_SYNC, 1);
+}
+
+static void l2c220_op_way(void __iomem *base, unsigned reg)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ __l2c_op_way(base + reg);
+ __l2c220_cache_sync(base);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start,
+ unsigned long end, unsigned long flags)
+{
+ raw_spinlock_t *lock = &l2x0_lock;
+
+ while (start < end) {
+ unsigned long blk_end = start + min(end - start, 4096UL);
+
+ while (start < blk_end) {
+ l2c_wait_mask(reg, 1);
+ writel_relaxed(start, reg);
+ start += CACHE_LINE_SIZE;
+ }
+
+ if (blk_end < end) {
+ raw_spin_unlock_irqrestore(lock, flags);
+ raw_spin_lock_irqsave(lock, flags);
+ }
+ }
+
+ return flags;
+}
+
+static void l2c220_inv_range(unsigned long start, unsigned long end)
+{
+ void __iomem *base = l2x0_base;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ if ((start | end) & (CACHE_LINE_SIZE - 1)) {
+ if (start & (CACHE_LINE_SIZE - 1)) {
+ start &= ~(CACHE_LINE_SIZE - 1);
+ writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
+ start += CACHE_LINE_SIZE;
+ }
+
+ if (end & (CACHE_LINE_SIZE - 1)) {
+ end &= ~(CACHE_LINE_SIZE - 1);
+ l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+ writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
+ }
+ }
+
+ flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA,
+ start, end, flags);
+ l2c_wait_mask(base + L2X0_INV_LINE_PA, 1);
+ __l2c220_cache_sync(base);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_clean_range(unsigned long start, unsigned long end)
+{
+ void __iomem *base = l2x0_base;
+ unsigned long flags;
+
+ start &= ~(CACHE_LINE_SIZE - 1);
+ if ((end - start) >= l2x0_size) {
+ l2c220_op_way(base, L2X0_CLEAN_WAY);
+ return;
+ }
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA,
+ start, end, flags);
+ l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+ __l2c220_cache_sync(base);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_flush_range(unsigned long start, unsigned long end)
+{
+ void __iomem *base = l2x0_base;
+ unsigned long flags;
+
+ start &= ~(CACHE_LINE_SIZE - 1);
+ if ((end - start) >= l2x0_size) {
+ l2c220_op_way(base, L2X0_CLEAN_INV_WAY);
+ return;
+ }
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA,
+ start, end, flags);
+ l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+ __l2c220_cache_sync(base);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_flush_all(void)
+{
+ l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY);
+}
+
+static void l2c220_sync(void)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ __l2c220_cache_sync(l2x0_base);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock)
+{
+ /*
+ * Always enable non-secure access to the lockdown registers -
+ * we write to them as part of the L2C enable sequence so they
+ * need to be accessible.
+ */
+ aux |= L220_AUX_CTRL_NS_LOCKDOWN;
+
+ l2c_enable(base, aux, num_lock);
+}
+
+static const struct l2c_init_data l2c220_data = {
+ .type = "L2C-220",
+ .way_size_0 = SZ_8K,
+ .num_lock = 1,
+ .enable = l2c220_enable,
+ .save = l2c_save,
+ .outer_cache = {
+ .inv_range = l2c220_inv_range,
+ .clean_range = l2c220_clean_range,
+ .flush_range = l2c220_flush_range,
+ .flush_all = l2c220_flush_all,
+ .disable = l2c_disable,
+ .sync = l2c220_sync,
+ .resume = l2c210_resume,
+ },
+};
+
+/*
+ * L2C-310 specific code.
+ *
+ * Very similar to L2C-210, the PA, set/way and sync operations are atomic,
+ * and the way operations are all background tasks. However, issuing an
+ * operation while a background operation is in progress results in a
+ * SLVERR response. We can reuse:
+ *
+ * __l2c210_cache_sync (using sync_reg_offset)
+ * l2c210_sync
+ * l2c210_inv_range (if 588369 is not applicable)
+ * l2c210_clean_range
+ * l2c210_flush_range (if 588369 is not applicable)
+ * l2c210_flush_all (if 727915 is not applicable)
+ *
+ * Errata:
+ * 588369: PL310 R0P0->R1P0, fixed R2P0.
+ * Affects: all clean+invalidate operations
+ * clean and invalidate skips the invalidate step, so we need to issue
+ * separate operations. We also require the above debug workaround
+ * enclosing this code fragment on affected parts. On unaffected parts,
+ * we must not use this workaround without the debug register writes
+ * to avoid exposing a problem similar to 727915.
+ *
+ * 727915: PL310 R2P0->R3P0, fixed R3P1.
+ * Affects: clean+invalidate by way
+ * clean and invalidate by way runs in the background, and a store can
+ * hit the line between the clean operation and invalidate operation,
+ * resulting in the store being lost.
+ *
+ * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2.
+ * Affects: 8x64-bit (double fill) line fetches
+ * double fill line fetches can fail to cause dirty data to be evicted
+ * from the cache before the new data overwrites the second line.
+ *
+ * 753970: PL310 R3P0, fixed R3P1.
+ * Affects: sync
+ * prevents merging writes after the sync operation, until another L2C
+ * operation is performed (or a number of other conditions.)
+ *
+ * 769419: PL310 R0P0->R3P1, fixed R3P2.
+ * Affects: store buffer
+ * store buffer is not automatically drained.
+ */
+static void l2c310_inv_range_erratum(unsigned long start, unsigned long end)
+{
+ void __iomem *base = l2x0_base;
+
+ if ((start | end) & (CACHE_LINE_SIZE - 1)) {
+ unsigned long flags;
+
+ /* Erratum 588369 for both clean+invalidate operations */
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ l2c_set_debug(base, 0x03);
+
+ if (start & (CACHE_LINE_SIZE - 1)) {
+ start &= ~(CACHE_LINE_SIZE - 1);
+ writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
+ writel_relaxed(start, base + L2X0_INV_LINE_PA);
+ start += CACHE_LINE_SIZE;
+ }
+
+ if (end & (CACHE_LINE_SIZE - 1)) {
+ end &= ~(CACHE_LINE_SIZE - 1);
+ writel_relaxed(end, base + L2X0_CLEAN_LINE_PA);
+ writel_relaxed(end, base + L2X0_INV_LINE_PA);
+ }
+
+ l2c_set_debug(base, 0x00);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+ }
+
+ __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
+ __l2c210_cache_sync(base);
+}
+
+static void l2c310_flush_range_erratum(unsigned long start, unsigned long end)
+{
+ raw_spinlock_t *lock = &l2x0_lock;
+ unsigned long flags;
+ void __iomem *base = l2x0_base;
+
+ raw_spin_lock_irqsave(lock, flags);
+ while (start < end) {
+ unsigned long blk_end = start + min(end - start, 4096UL);
+
+ l2c_set_debug(base, 0x03);
+ while (start < blk_end) {
+ writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
+ writel_relaxed(start, base + L2X0_INV_LINE_PA);
+ start += CACHE_LINE_SIZE;
+ }
+ l2c_set_debug(base, 0x00);
+
+ if (blk_end < end) {
+ raw_spin_unlock_irqrestore(lock, flags);
+ raw_spin_lock_irqsave(lock, flags);
+ }
+ }
+ raw_spin_unlock_irqrestore(lock, flags);
+ __l2c210_cache_sync(base);
+}
+
+static void l2c310_flush_all_erratum(void)
+{
+ void __iomem *base = l2x0_base;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ l2c_set_debug(base, 0x03);
+ __l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+ l2c_set_debug(base, 0x00);
+ __l2c210_cache_sync(base);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void __init l2c310_save(void __iomem *base)
+{
+ unsigned revision;
+
+ l2c_save(base);
+
+ l2x0_saved_regs.tag_latency = readl_relaxed(base +
+ L310_TAG_LATENCY_CTRL);
+ l2x0_saved_regs.data_latency = readl_relaxed(base +
+ L310_DATA_LATENCY_CTRL);
+ l2x0_saved_regs.filter_end = readl_relaxed(base +
+ L310_ADDR_FILTER_END);
+ l2x0_saved_regs.filter_start = readl_relaxed(base +
+ L310_ADDR_FILTER_START);
+
+ revision = readl_relaxed(base + L2X0_CACHE_ID) &
+ L2X0_CACHE_ID_RTL_MASK;
+
+ /* From r2p0, there is Prefetch offset/control register */
+ if (revision >= L310_CACHE_ID_RTL_R2P0)
+ l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base +
+ L310_PREFETCH_CTRL);
+
+ /* From r3p0, there is Power control register */
+ if (revision >= L310_CACHE_ID_RTL_R3P0)
+ l2x0_saved_regs.pwr_ctrl = readl_relaxed(base +
+ L310_POWER_CTRL);
+}
+
+static void l2c310_resume(void)
+{
+ void __iomem *base = l2x0_base;
+
+ if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+ unsigned revision;
+
+ /* restore pl310 setup */
+ writel_relaxed(l2x0_saved_regs.tag_latency,
+ base + L310_TAG_LATENCY_CTRL);
+ writel_relaxed(l2x0_saved_regs.data_latency,
+ base + L310_DATA_LATENCY_CTRL);
+ writel_relaxed(l2x0_saved_regs.filter_end,
+ base + L310_ADDR_FILTER_END);
+ writel_relaxed(l2x0_saved_regs.filter_start,
+ base + L310_ADDR_FILTER_START);
+
+ revision = readl_relaxed(base + L2X0_CACHE_ID) &
+ L2X0_CACHE_ID_RTL_MASK;
+
+ if (revision >= L310_CACHE_ID_RTL_R2P0)
+ l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
+ L310_PREFETCH_CTRL);
+ if (revision >= L310_CACHE_ID_RTL_R3P0)
+ l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
+ L310_POWER_CTRL);
+
+ l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
+
+ /* Re-enable full-line-of-zeros for Cortex-A9 */
+ if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
+ set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+ }
+}
+
+static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data)
+{
+ switch (act & ~CPU_TASKS_FROZEN) {
+ case CPU_STARTING:
+ set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+ break;
+ case CPU_DYING:
+ set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
+{
+ unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK;
+ bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+
+ if (rev >= L310_CACHE_ID_RTL_R2P0) {
+ if (cortex_a9) {
+ aux |= L310_AUX_CTRL_EARLY_BRESP;
+ pr_info("L2C-310 enabling early BRESP for Cortex-A9\n");
+ } else if (aux & L310_AUX_CTRL_EARLY_BRESP) {
+ pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n");
+ aux &= ~L310_AUX_CTRL_EARLY_BRESP;
+ }
+ }
+
+ if (cortex_a9) {
+ u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL);
+ u32 acr = get_auxcr();
+
+ pr_debug("Cortex-A9 ACR=0x%08x\n", acr);
+
+ if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO))
+ pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n");
+
+ if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3)))
+ pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n");
+
+ if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) {
+ aux |= L310_AUX_CTRL_FULL_LINE_ZERO;
+ pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n");
+ }
+ } else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) {
+ pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n");
+ aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP);
+ }
+
+ if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) {
+ u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL);
+
+ pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n",
+ aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "",
+ aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "",
+ 1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK));
+ }
+
+ /* r3p0 or later has power control register */
+ if (rev >= L310_CACHE_ID_RTL_R3P0) {
+ u32 power_ctrl;
+
+ l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN,
+ base, L310_POWER_CTRL);
+ power_ctrl = readl_relaxed(base + L310_POWER_CTRL);
+ pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n",
+ power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis",
+ power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis");
+ }
+
+ /*
+ * Always enable non-secure access to the lockdown registers -
+ * we write to them as part of the L2C enable sequence so they
+ * need to be accessible.
+ */
+ aux |= L310_AUX_CTRL_NS_LOCKDOWN;
+
+ l2c_enable(base, aux, num_lock);
+
+ if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) {
+ set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+ cpu_notifier(l2c310_cpu_enable_flz, 0);
+ }
+}
+
+static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
+ struct outer_cache_fns *fns)
+{
+ unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK;
+ const char *errata[8];
+ unsigned n = 0;
- /* disable L2X0 */
- writel(0, l2x0_base + L2X0_CTRL);
+ if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) &&
+ revision < L310_CACHE_ID_RTL_R2P0 &&
+ /* For bcm compatibility */
+ fns->inv_range == l2c210_inv_range) {
+ fns->inv_range = l2c310_inv_range_erratum;
+ fns->flush_range = l2c310_flush_range_erratum;
+ errata[n++] = "588369";
+ }
+
+ if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) &&
+ revision >= L310_CACHE_ID_RTL_R2P0 &&
+ revision < L310_CACHE_ID_RTL_R3P1) {
+ fns->flush_all = l2c310_flush_all_erratum;
+ errata[n++] = "727915";
+ }
+
+ if (revision >= L310_CACHE_ID_RTL_R3P0 &&
+ revision < L310_CACHE_ID_RTL_R3P2) {
+ u32 val = readl_relaxed(base + L310_PREFETCH_CTRL);
+ /* I don't think bit23 is required here... but iMX6 does so */
+ if (val & (BIT(30) | BIT(23))) {
+ val &= ~(BIT(30) | BIT(23));
+ l2c_write_sec(val, base, L310_PREFETCH_CTRL);
+ errata[n++] = "752271";
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) &&
+ revision == L310_CACHE_ID_RTL_R3P0) {
+ sync_reg_offset = L2X0_DUMMY_REG;
+ errata[n++] = "753970";
+ }
+
+ if (IS_ENABLED(CONFIG_PL310_ERRATA_769419))
+ errata[n++] = "769419";
+
+ if (n) {
+ unsigned i;
+
+ pr_info("L2C-310 errat%s", n > 1 ? "a" : "um");
+ for (i = 0; i < n; i++)
+ pr_cont(" %s", errata[i]);
+ pr_cont(" enabled\n");
+ }
+}
+
+static void l2c310_disable(void)
+{
+ /*
+ * If full-line-of-zeros is enabled, we must first disable it in the
+ * Cortex-A9 auxiliary control register before disabling the L2 cache.
+ */
+ if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
+ set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
+
+ l2c_disable();
+}
- aux = readl(l2x0_base + L2X0_AUX_CTRL);
+static const struct l2c_init_data l2c310_init_fns __initconst = {
+ .type = "L2C-310",
+ .way_size_0 = SZ_8K,
+ .num_lock = 8,
+ .enable = l2c310_enable,
+ .fixup = l2c310_fixup,
+ .save = l2c310_save,
+ .outer_cache = {
+ .inv_range = l2c210_inv_range,
+ .clean_range = l2c210_clean_range,
+ .flush_range = l2c210_flush_range,
+ .flush_all = l2c210_flush_all,
+ .disable = l2c310_disable,
+ .sync = l2c210_sync,
+ .resume = l2c310_resume,
+ },
+};
+
+static void __init __l2c_init(const struct l2c_init_data *data,
+ u32 aux_val, u32 aux_mask, u32 cache_id)
+{
+ struct outer_cache_fns fns;
+ unsigned way_size_bits, ways;
+ u32 aux, old_aux;
+
+ /*
+ * Sanity check the aux values. aux_mask is the bits we preserve
+ * from reading the hardware register, and aux_val is the bits we
+ * set.
+ */
+ if (aux_val & aux_mask)
+ pr_alert("L2C: platform provided aux values permit register corruption.\n");
+
+ old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
aux &= aux_mask;
aux |= aux_val;
- writel(aux, l2x0_base + L2X0_AUX_CTRL);
- l2x0_inv_all();
+ if (old_aux != aux)
+ pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
+ old_aux, aux);
+
+ /* Determine the number of ways */
+ switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
+ case L2X0_CACHE_ID_PART_L310:
+ if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16))
+ pr_warn("L2C: DT/platform tries to modify or specify cache size\n");
+ if (aux & (1 << 16))
+ ways = 16;
+ else
+ ways = 8;
+ break;
+
+ case L2X0_CACHE_ID_PART_L210:
+ case L2X0_CACHE_ID_PART_L220:
+ ways = (aux >> 13) & 0xf;
+ break;
+
+ case AURORA_CACHE_ID:
+ ways = (aux >> 13) & 0xf;
+ ways = 2 << ((ways + 1) >> 2);
+ break;
+
+ default:
+ /* Assume unknown chips have 8 ways */
+ ways = 8;
+ break;
+ }
+
+ l2x0_way_mask = (1 << ways) - 1;
+
+ /*
+ * way_size_0 is the size that a way_size value of zero would be
+ * given the calculation: way_size = way_size_0 << way_size_bits.
+ * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k,
+ * then way_size_0 would be 8k.
+ *
+ * L2 cache size = number of ways * way size.
+ */
+ way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >>
+ L2C_AUX_CTRL_WAY_SIZE_SHIFT;
+ l2x0_size = ways * (data->way_size_0 << way_size_bits);
+
+ fns = data->outer_cache;
+ fns.write_sec = outer_cache.write_sec;
+ if (data->fixup)
+ data->fixup(l2x0_base, cache_id, &fns);
+
+ /*
+ * Check if l2x0 controller is already enabled. If we are booting
+ * in non-secure mode accessing the below registers will fault.
+ */
+ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
+ data->enable(l2x0_base, aux, data->num_lock);
+
+ outer_cache = fns;
+
+ /*
+ * It is strange to save the register state before initialisation,
+ * but hey, this is what the DT implementations decided to do.
+ */
+ if (data->save)
+ data->save(l2x0_base);
+
+ /* Re-read it in case some bits are reserved. */
+ aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+
+ pr_info("%s cache controller enabled, %d ways, %d kB\n",
+ data->type, ways, l2x0_size >> 10);
+ pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
+ data->type, cache_id, aux);
+}
+
+void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
+{
+ const struct l2c_init_data *data;
+ u32 cache_id;
+
+ l2x0_base = base;
+
+ cache_id = readl_relaxed(base + L2X0_CACHE_ID);
+
+ switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
+ default:
+ case L2X0_CACHE_ID_PART_L210:
+ data = &l2c210_data;
+ break;
+
+ case L2X0_CACHE_ID_PART_L220:
+ data = &l2c220_data;
+ break;
+
+ case L2X0_CACHE_ID_PART_L310:
+ data = &l2c310_init_fns;
+ break;
+ }
+
+ __l2c_init(data, aux_val, aux_mask, cache_id);
+}
+
+#ifdef CONFIG_OF
+static int l2_wt_override;
+
+/* Aurora don't have the cache ID register available, so we have to
+ * pass it though the device tree */
+static u32 cache_id_part_number_from_dt;
+
+static void __init l2x0_of_parse(const struct device_node *np,
+ u32 *aux_val, u32 *aux_mask)
+{
+ u32 data[2] = { 0, 0 };
+ u32 tag = 0;
+ u32 dirty = 0;
+ u32 val = 0, mask = 0;
+
+ of_property_read_u32(np, "arm,tag-latency", &tag);
+ if (tag) {
+ mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
+ val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
+ }
+
+ of_property_read_u32_array(np, "arm,data-latency",
+ data, ARRAY_SIZE(data));
+ if (data[0] && data[1]) {
+ mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
+ L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
+ val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
+ ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
+ }
+
+ of_property_read_u32(np, "arm,dirty-latency", &dirty);
+ if (dirty) {
+ mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
+ val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
+ }
+
+ *aux_val &= ~mask;
+ *aux_val |= val;
+ *aux_mask &= ~mask;
+}
+
+static const struct l2c_init_data of_l2c210_data __initconst = {
+ .type = "L2C-210",
+ .way_size_0 = SZ_8K,
+ .num_lock = 1,
+ .of_parse = l2x0_of_parse,
+ .enable = l2c_enable,
+ .save = l2c_save,
+ .outer_cache = {
+ .inv_range = l2c210_inv_range,
+ .clean_range = l2c210_clean_range,
+ .flush_range = l2c210_flush_range,
+ .flush_all = l2c210_flush_all,
+ .disable = l2c_disable,
+ .sync = l2c210_sync,
+ .resume = l2c210_resume,
+ },
+};
+
+static const struct l2c_init_data of_l2c220_data __initconst = {
+ .type = "L2C-220",
+ .way_size_0 = SZ_8K,
+ .num_lock = 1,
+ .of_parse = l2x0_of_parse,
+ .enable = l2c220_enable,
+ .save = l2c_save,
+ .outer_cache = {
+ .inv_range = l2c220_inv_range,
+ .clean_range = l2c220_clean_range,
+ .flush_range = l2c220_flush_range,
+ .flush_all = l2c220_flush_all,
+ .disable = l2c_disable,
+ .sync = l2c220_sync,
+ .resume = l2c210_resume,
+ },
+};
+
+static void __init l2c310_of_parse(const struct device_node *np,
+ u32 *aux_val, u32 *aux_mask)
+{
+ u32 data[3] = { 0, 0, 0 };
+ u32 tag[3] = { 0, 0, 0 };
+ u32 filter[2] = { 0, 0 };
+
+ of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
+ if (tag[0] && tag[1] && tag[2])
+ writel_relaxed(
+ L310_LATENCY_CTRL_RD(tag[0] - 1) |
+ L310_LATENCY_CTRL_WR(tag[1] - 1) |
+ L310_LATENCY_CTRL_SETUP(tag[2] - 1),
+ l2x0_base + L310_TAG_LATENCY_CTRL);
+
+ of_property_read_u32_array(np, "arm,data-latency",
+ data, ARRAY_SIZE(data));
+ if (data[0] && data[1] && data[2])
+ writel_relaxed(
+ L310_LATENCY_CTRL_RD(data[0] - 1) |
+ L310_LATENCY_CTRL_WR(data[1] - 1) |
+ L310_LATENCY_CTRL_SETUP(data[2] - 1),
+ l2x0_base + L310_DATA_LATENCY_CTRL);
+
+ of_property_read_u32_array(np, "arm,filter-ranges",
+ filter, ARRAY_SIZE(filter));
+ if (filter[1]) {
+ writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
+ l2x0_base + L310_ADDR_FILTER_END);
+ writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
+ l2x0_base + L310_ADDR_FILTER_START);
+ }
+}
+
+static const struct l2c_init_data of_l2c310_data __initconst = {
+ .type = "L2C-310",
+ .way_size_0 = SZ_8K,
+ .num_lock = 8,
+ .of_parse = l2c310_of_parse,
+ .enable = l2c310_enable,
+ .fixup = l2c310_fixup,
+ .save = l2c310_save,
+ .outer_cache = {
+ .inv_range = l2c210_inv_range,
+ .clean_range = l2c210_clean_range,
+ .flush_range = l2c210_flush_range,
+ .flush_all = l2c210_flush_all,
+ .disable = l2c310_disable,
+ .sync = l2c210_sync,
+ .resume = l2c310_resume,
+ },
+};
+
+/*
+ * This is a variant of the of_l2c310_data with .sync set to
+ * NULL. Outer sync operations are not needed when the system is I/O
+ * coherent, and potentially harmful in certain situations (PCIe/PL310
+ * deadlock on Armada 375/38x due to hardware I/O coherency). The
+ * other operations are kept because they are infrequent (therefore do
+ * not cause the deadlock in practice) and needed for secondary CPU
+ * boot and other power management activities.
+ */
+static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
+ .type = "L2C-310 Coherent",
+ .way_size_0 = SZ_8K,
+ .num_lock = 8,
+ .of_parse = l2c310_of_parse,
+ .enable = l2c310_enable,
+ .fixup = l2c310_fixup,
+ .save = l2c310_save,
+ .outer_cache = {
+ .inv_range = l2c210_inv_range,
+ .clean_range = l2c210_clean_range,
+ .flush_range = l2c210_flush_range,
+ .flush_all = l2c210_flush_all,
+ .disable = l2c310_disable,
+ .resume = l2c310_resume,
+ },
+};
+
+/*
+ * Note that the end addresses passed to Linux primitives are
+ * noninclusive, while the hardware cache range operations use
+ * inclusive start and end addresses.
+ */
+static unsigned long calc_range_end(unsigned long start, unsigned long end)
+{
+ /*
+ * Limit the number of cache lines processed at once,
+ * since cache range operations stall the CPU pipeline
+ * until completion.
+ */
+ if (end > start + MAX_RANGE_SIZE)
+ end = start + MAX_RANGE_SIZE;
+
+ /*
+ * Cache range operations can't straddle a page boundary.
+ */
+ if (end > PAGE_ALIGN(start+1))
+ end = PAGE_ALIGN(start+1);
+
+ return end;
+}
+
+/*
+ * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
+ * and range operations only do a TLB lookup on the start address.
+ */
+static void aurora_pa_range(unsigned long start, unsigned long end,
+ unsigned long offset)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&l2x0_lock, flags);
+ writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
+ writel_relaxed(end, l2x0_base + offset);
+ raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+ cache_sync();
+}
+
+static void aurora_inv_range(unsigned long start, unsigned long end)
+{
+ /*
+ * round start and end adresses up to cache line size
+ */
+ start &= ~(CACHE_LINE_SIZE - 1);
+ end = ALIGN(end, CACHE_LINE_SIZE);
+
+ /*
+ * Invalidate all full cache lines between 'start' and 'end'.
+ */
+ while (start < end) {
+ unsigned long range_end = calc_range_end(start, end);
+ aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+ AURORA_INVAL_RANGE_REG);
+ start = range_end;
+ }
+}
+
+static void aurora_clean_range(unsigned long start, unsigned long end)
+{
+ /*
+ * If L2 is forced to WT, the L2 will always be clean and we
+ * don't need to do anything here.
+ */
+ if (!l2_wt_override) {
+ start &= ~(CACHE_LINE_SIZE - 1);
+ end = ALIGN(end, CACHE_LINE_SIZE);
+ while (start != end) {
+ unsigned long range_end = calc_range_end(start, end);
+ aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+ AURORA_CLEAN_RANGE_REG);
+ start = range_end;
+ }
+ }
+}
+
+static void aurora_flush_range(unsigned long start, unsigned long end)
+{
+ start &= ~(CACHE_LINE_SIZE - 1);
+ end = ALIGN(end, CACHE_LINE_SIZE);
+ while (start != end) {
+ unsigned long range_end = calc_range_end(start, end);
+ /*
+ * If L2 is forced to WT, the L2 will always be clean and we
+ * just need to invalidate.
+ */
+ if (l2_wt_override)
+ aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+ AURORA_INVAL_RANGE_REG);
+ else
+ aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
+ AURORA_FLUSH_RANGE_REG);
+ start = range_end;
+ }
+}
+
+static void aurora_save(void __iomem *base)
+{
+ l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
+ l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
+}
+
+static void aurora_resume(void)
+{
+ void __iomem *base = l2x0_base;
+
+ if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+ writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL);
+ writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL);
+ }
+}
+
+/*
+ * For Aurora cache in no outer mode, enable via the CP15 coprocessor
+ * broadcasting of cache commands to L2.
+ */
+static void __init aurora_enable_no_outer(void __iomem *base, u32 aux,
+ unsigned num_lock)
+{
+ u32 u;
+
+ asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u));
+ u |= AURORA_CTRL_FW; /* Set the FW bit */
+ asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u));
+
+ isb();
+
+ l2c_enable(base, aux, num_lock);
+}
+
+static void __init aurora_fixup(void __iomem *base, u32 cache_id,
+ struct outer_cache_fns *fns)
+{
+ sync_reg_offset = AURORA_SYNC_REG;
+}
+
+static void __init aurora_of_parse(const struct device_node *np,
+ u32 *aux_val, u32 *aux_mask)
+{
+ u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
+ u32 mask = AURORA_ACR_REPLACEMENT_MASK;
+
+ of_property_read_u32(np, "cache-id-part",
+ &cache_id_part_number_from_dt);
+
+ /* Determine and save the write policy */
+ l2_wt_override = of_property_read_bool(np, "wt-override");
+
+ if (l2_wt_override) {
+ val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
+ mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
+ }
+
+ *aux_val &= ~mask;
+ *aux_val |= val;
+ *aux_mask &= ~mask;
+}
+
+static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
+ .type = "Aurora",
+ .way_size_0 = SZ_4K,
+ .num_lock = 4,
+ .of_parse = aurora_of_parse,
+ .enable = l2c_enable,
+ .fixup = aurora_fixup,
+ .save = aurora_save,
+ .outer_cache = {
+ .inv_range = aurora_inv_range,
+ .clean_range = aurora_clean_range,
+ .flush_range = aurora_flush_range,
+ .flush_all = l2x0_flush_all,
+ .disable = l2x0_disable,
+ .sync = l2x0_cache_sync,
+ .resume = aurora_resume,
+ },
+};
+
+static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
+ .type = "Aurora",
+ .way_size_0 = SZ_4K,
+ .num_lock = 4,
+ .of_parse = aurora_of_parse,
+ .enable = aurora_enable_no_outer,
+ .fixup = aurora_fixup,
+ .save = aurora_save,
+ .outer_cache = {
+ .resume = aurora_resume,
+ },
+};
+
+/*
+ * For certain Broadcom SoCs, depending on the address range, different offsets
+ * need to be added to the address before passing it to L2 for
+ * invalidation/clean/flush
+ *
+ * Section Address Range Offset EMI
+ * 1 0x00000000 - 0x3FFFFFFF 0x80000000 VC
+ * 2 0x40000000 - 0xBFFFFFFF 0x40000000 SYS
+ * 3 0xC0000000 - 0xFFFFFFFF 0x80000000 VC
+ *
+ * When the start and end addresses have crossed two different sections, we
+ * need to break the L2 operation into two, each within its own section.
+ * For example, if we need to invalidate addresses starts at 0xBFFF0000 and
+ * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2)
+ * 0xC0000000 - 0xC0001000
+ *
+ * Note 1:
+ * By breaking a single L2 operation into two, we may potentially suffer some
+ * performance hit, but keep in mind the cross section case is very rare
+ *
+ * Note 2:
+ * We do not need to handle the case when the start address is in
+ * Section 1 and the end address is in Section 3, since it is not a valid use
+ * case
+ *
+ * Note 3:
+ * Section 1 in practical terms can no longer be used on rev A2. Because of
+ * that the code does not need to handle section 1 at all.
+ *
+ */
+#define BCM_SYS_EMI_START_ADDR 0x40000000UL
+#define BCM_VC_EMI_SEC3_START_ADDR 0xC0000000UL
+
+#define BCM_SYS_EMI_OFFSET 0x40000000UL
+#define BCM_VC_EMI_OFFSET 0x80000000UL
+
+static inline int bcm_addr_is_sys_emi(unsigned long addr)
+{
+ return (addr >= BCM_SYS_EMI_START_ADDR) &&
+ (addr < BCM_VC_EMI_SEC3_START_ADDR);
+}
+
+static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
+{
+ if (bcm_addr_is_sys_emi(addr))
+ return addr + BCM_SYS_EMI_OFFSET;
+ else
+ return addr + BCM_VC_EMI_OFFSET;
+}
+
+static void bcm_inv_range(unsigned long start, unsigned long end)
+{
+ unsigned long new_start, new_end;
+
+ BUG_ON(start < BCM_SYS_EMI_START_ADDR);
+
+ if (unlikely(end <= start))
+ return;
+
+ new_start = bcm_l2_phys_addr(start);
+ new_end = bcm_l2_phys_addr(end);
+
+ /* normal case, no cross section between start and end */
+ if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
+ l2c210_inv_range(new_start, new_end);
+ return;
+ }
+
+ /* They cross sections, so it can only be a cross from section
+ * 2 to section 3
+ */
+ l2c210_inv_range(new_start,
+ bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
+ l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+ new_end);
+}
+
+static void bcm_clean_range(unsigned long start, unsigned long end)
+{
+ unsigned long new_start, new_end;
+
+ BUG_ON(start < BCM_SYS_EMI_START_ADDR);
+
+ if (unlikely(end <= start))
+ return;
+
+ new_start = bcm_l2_phys_addr(start);
+ new_end = bcm_l2_phys_addr(end);
+
+ /* normal case, no cross section between start and end */
+ if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
+ l2c210_clean_range(new_start, new_end);
+ return;
+ }
+
+ /* They cross sections, so it can only be a cross from section
+ * 2 to section 3
+ */
+ l2c210_clean_range(new_start,
+ bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
+ l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+ new_end);
+}
+
+static void bcm_flush_range(unsigned long start, unsigned long end)
+{
+ unsigned long new_start, new_end;
+
+ BUG_ON(start < BCM_SYS_EMI_START_ADDR);
+
+ if (unlikely(end <= start))
+ return;
+
+ if ((end - start) >= l2x0_size) {
+ outer_cache.flush_all();
+ return;
+ }
+
+ new_start = bcm_l2_phys_addr(start);
+ new_end = bcm_l2_phys_addr(end);
+
+ /* normal case, no cross section between start and end */
+ if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
+ l2c210_flush_range(new_start, new_end);
+ return;
+ }
+
+ /* They cross sections, so it can only be a cross from section
+ * 2 to section 3
+ */
+ l2c210_flush_range(new_start,
+ bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
+ l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+ new_end);
+}
+
+/* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */
+static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
+ .type = "BCM-L2C-310",
+ .way_size_0 = SZ_8K,
+ .num_lock = 8,
+ .of_parse = l2c310_of_parse,
+ .enable = l2c310_enable,
+ .save = l2c310_save,
+ .outer_cache = {
+ .inv_range = bcm_inv_range,
+ .clean_range = bcm_clean_range,
+ .flush_range = bcm_flush_range,
+ .flush_all = l2c210_flush_all,
+ .disable = l2c310_disable,
+ .sync = l2c210_sync,
+ .resume = l2c310_resume,
+ },
+};
+
+static void __init tauros3_save(void __iomem *base)
+{
+ l2c_save(base);
+
+ l2x0_saved_regs.aux2_ctrl =
+ readl_relaxed(base + TAUROS3_AUX2_CTRL);
+ l2x0_saved_regs.prefetch_ctrl =
+ readl_relaxed(base + L310_PREFETCH_CTRL);
+}
+
+static void tauros3_resume(void)
+{
+ void __iomem *base = l2x0_base;
+
+ if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+ writel_relaxed(l2x0_saved_regs.aux2_ctrl,
+ base + TAUROS3_AUX2_CTRL);
+ writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
+ base + L310_PREFETCH_CTRL);
+
+ l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
+ }
+}
+
+static const struct l2c_init_data of_tauros3_data __initconst = {
+ .type = "Tauros3",
+ .way_size_0 = SZ_8K,
+ .num_lock = 8,
+ .enable = l2c_enable,
+ .save = tauros3_save,
+ /* Tauros3 broadcasts L1 cache operations to L2 */
+ .outer_cache = {
+ .resume = tauros3_resume,
+ },
+};
+
+#define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }
+static const struct of_device_id l2x0_ids[] __initconst = {
+ L2C_ID("arm,l210-cache", of_l2c210_data),
+ L2C_ID("arm,l220-cache", of_l2c220_data),
+ L2C_ID("arm,pl310-cache", of_l2c310_data),
+ L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
+ L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data),
+ L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data),
+ L2C_ID("marvell,tauros3-cache", of_tauros3_data),
+ /* Deprecated IDs */
+ L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
+ {}
+};
+
+int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
+{
+ const struct l2c_init_data *data;
+ struct device_node *np;
+ struct resource res;
+ u32 cache_id, old_aux;
+
+ np = of_find_matching_node(NULL, l2x0_ids);
+ if (!np)
+ return -ENODEV;
+
+ if (of_address_to_resource(np, 0, &res))
+ return -ENODEV;
+
+ l2x0_base = ioremap(res.start, resource_size(&res));
+ if (!l2x0_base)
+ return -ENOMEM;
+
+ l2x0_saved_regs.phy_base = res.start;
+
+ data = of_match_node(l2x0_ids, np)->data;
+
+ if (of_device_is_compatible(np, "arm,pl310-cache") &&
+ of_property_read_bool(np, "arm,io-coherent"))
+ data = &of_l2c310_coherent_data;
+
+ old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+ if (old_aux != ((old_aux & aux_mask) | aux_val)) {
+ pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n",
+ old_aux, (old_aux & aux_mask) | aux_val);
+ } else if (aux_mask != ~0U && aux_val != 0) {
+ pr_alert("L2C: platform provided aux values match the hardware, so have no effect. Please remove them.\n");
+ }
+
+ /* All L2 caches are unified, so this property should be specified */
+ if (!of_property_read_bool(np, "cache-unified"))
+ pr_err("L2C: device tree omits to specify unified cache\n");
+
+ /* L2 configuration can only be changed if the cache is disabled */
+ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
+ if (data->of_parse)
+ data->of_parse(np, &aux_val, &aux_mask);
- /* enable L2X0 */
- writel(1, l2x0_base + L2X0_CTRL);
+ if (cache_id_part_number_from_dt)
+ cache_id = cache_id_part_number_from_dt;
+ else
+ cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
- outer_cache.inv_range = l2x0_inv_range;
- outer_cache.clean_range = l2x0_clean_range;
- outer_cache.flush_range = l2x0_flush_range;
+ __l2c_init(data, aux_val, aux_mask, cache_id);
- printk(KERN_INFO "L2X0 cache controller enabled\n");
+ return 0;
}
+#endif
diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S
new file mode 100644
index 00000000000..8e12ddca003
--- /dev/null
+++ b/arch/arm/mm/cache-nop.S
@@ -0,0 +1,50 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+#include "proc-macros.S"
+
+ENTRY(nop_flush_icache_all)
+ mov pc, lr
+ENDPROC(nop_flush_icache_all)
+
+ .globl nop_flush_kern_cache_all
+ .equ nop_flush_kern_cache_all, nop_flush_icache_all
+
+ .globl nop_flush_kern_cache_louis
+ .equ nop_flush_kern_cache_louis, nop_flush_icache_all
+
+ .globl nop_flush_user_cache_all
+ .equ nop_flush_user_cache_all, nop_flush_icache_all
+
+ .globl nop_flush_user_cache_range
+ .equ nop_flush_user_cache_range, nop_flush_icache_all
+
+ .globl nop_coherent_kern_range
+ .equ nop_coherent_kern_range, nop_flush_icache_all
+
+ENTRY(nop_coherent_user_range)
+ mov r0, 0
+ mov pc, lr
+ENDPROC(nop_coherent_user_range)
+
+ .globl nop_flush_kern_dcache_area
+ .equ nop_flush_kern_dcache_area, nop_flush_icache_all
+
+ .globl nop_dma_flush_range
+ .equ nop_dma_flush_range, nop_flush_icache_all
+
+ .globl nop_dma_map_area
+ .equ nop_dma_map_area, nop_flush_icache_all
+
+ .globl nop_dma_unmap_area
+ .equ nop_dma_unmap_area, nop_flush_icache_all
+
+ __INITDATA
+
+ @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+ define_cache_functions nop
diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c
new file mode 100644
index 00000000000..b273739e635
--- /dev/null
+++ b/arch/arm/mm/cache-tauros2.c
@@ -0,0 +1,302 @@
+/*
+ * arch/arm/mm/cache-tauros2.c - Tauros2 L2 cache controller support
+ *
+ * Copyright (C) 2008 Marvell Semiconductor
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * References:
+ * - PJ1 CPU Core Datasheet,
+ * Document ID MV-S104837-01, Rev 0.7, January 24 2008.
+ * - PJ4 CPU Core Datasheet,
+ * Document ID MV-S105190-00, Rev 0.7, March 14 2008.
+ */
+
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <asm/cacheflush.h>
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/hardware/cache-tauros2.h>
+
+
+/*
+ * When Tauros2 is used on a CPU that supports the v7 hierarchical
+ * cache operations, the cache handling code in proc-v7.S takes care
+ * of everything, including handling DMA coherency.
+ *
+ * So, we only need to register outer cache operations here if we're
+ * being used on a pre-v7 CPU, and we only need to build support for
+ * outer cache operations into the kernel image if the kernel has been
+ * configured to support a pre-v7 CPU.
+ */
+#ifdef CONFIG_CPU_32v5
+/*
+ * Low-level cache maintenance operations.
+ */
+static inline void tauros2_clean_pa(unsigned long addr)
+{
+ __asm__("mcr p15, 1, %0, c7, c11, 3" : : "r" (addr));
+}
+
+static inline void tauros2_clean_inv_pa(unsigned long addr)
+{
+ __asm__("mcr p15, 1, %0, c7, c15, 3" : : "r" (addr));
+}
+
+static inline void tauros2_inv_pa(unsigned long addr)
+{
+ __asm__("mcr p15, 1, %0, c7, c7, 3" : : "r" (addr));
+}
+
+
+/*
+ * Linux primitives.
+ *
+ * Note that the end addresses passed to Linux primitives are
+ * noninclusive.
+ */
+#define CACHE_LINE_SIZE 32
+
+static void tauros2_inv_range(unsigned long start, unsigned long end)
+{
+ /*
+ * Clean and invalidate partial first cache line.
+ */
+ if (start & (CACHE_LINE_SIZE - 1)) {
+ tauros2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1));
+ start = (start | (CACHE_LINE_SIZE - 1)) + 1;
+ }
+
+ /*
+ * Clean and invalidate partial last cache line.
+ */
+ if (end & (CACHE_LINE_SIZE - 1)) {
+ tauros2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1));
+ end &= ~(CACHE_LINE_SIZE - 1);
+ }
+
+ /*
+ * Invalidate all full cache lines between 'start' and 'end'.
+ */
+ while (start < end) {
+ tauros2_inv_pa(start);
+ start += CACHE_LINE_SIZE;
+ }
+
+ dsb();
+}
+
+static void tauros2_clean_range(unsigned long start, unsigned long end)
+{
+ start &= ~(CACHE_LINE_SIZE - 1);
+ while (start < end) {
+ tauros2_clean_pa(start);
+ start += CACHE_LINE_SIZE;
+ }
+
+ dsb();
+}
+
+static void tauros2_flush_range(unsigned long start, unsigned long end)
+{
+ start &= ~(CACHE_LINE_SIZE - 1);
+ while (start < end) {
+ tauros2_clean_inv_pa(start);
+ start += CACHE_LINE_SIZE;
+ }
+
+ dsb();
+}
+
+static void tauros2_disable(void)
+{
+ __asm__ __volatile__ (
+ "mcr p15, 1, %0, c7, c11, 0 @L2 Cache Clean All\n\t"
+ "mrc p15, 0, %0, c1, c0, 0\n\t"
+ "bic %0, %0, #(1 << 26)\n\t"
+ "mcr p15, 0, %0, c1, c0, 0 @Disable L2 Cache\n\t"
+ : : "r" (0x0));
+}
+
+static void tauros2_resume(void)
+{
+ __asm__ __volatile__ (
+ "mcr p15, 1, %0, c7, c7, 0 @L2 Cache Invalidate All\n\t"
+ "mrc p15, 0, %0, c1, c0, 0\n\t"
+ "orr %0, %0, #(1 << 26)\n\t"
+ "mcr p15, 0, %0, c1, c0, 0 @Enable L2 Cache\n\t"
+ : : "r" (0x0));
+}
+#endif
+
+static inline u32 __init read_extra_features(void)
+{
+ u32 u;
+
+ __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u));
+
+ return u;
+}
+
+static inline void __init write_extra_features(u32 u)
+{
+ __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u));
+}
+
+static inline int __init cpuid_scheme(void)
+{
+ return !!((processor_id & 0x000f0000) == 0x000f0000);
+}
+
+static inline u32 __init read_mmfr3(void)
+{
+ u32 mmfr3;
+
+ __asm__("mrc p15, 0, %0, c0, c1, 7\n" : "=r" (mmfr3));
+
+ return mmfr3;
+}
+
+static inline u32 __init read_actlr(void)
+{
+ u32 actlr;
+
+ __asm__("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+
+ return actlr;
+}
+
+static inline void __init write_actlr(u32 actlr)
+{
+ __asm__("mcr p15, 0, %0, c1, c0, 1\n" : : "r" (actlr));
+}
+
+static void enable_extra_feature(unsigned int features)
+{
+ u32 u;
+
+ u = read_extra_features();
+
+ if (features & CACHE_TAUROS2_PREFETCH_ON)
+ u &= ~0x01000000;
+ else
+ u |= 0x01000000;
+ printk(KERN_INFO "Tauros2: %s L2 prefetch.\n",
+ (features & CACHE_TAUROS2_PREFETCH_ON)
+ ? "Enabling" : "Disabling");
+
+ if (features & CACHE_TAUROS2_LINEFILL_BURST8)
+ u |= 0x00100000;
+ else
+ u &= ~0x00100000;
+ printk(KERN_INFO "Tauros2: %s line fill burt8.\n",
+ (features & CACHE_TAUROS2_LINEFILL_BURST8)
+ ? "Enabling" : "Disabling");
+
+ write_extra_features(u);
+}
+
+static void __init tauros2_internal_init(unsigned int features)
+{
+ char *mode = NULL;
+
+ enable_extra_feature(features);
+
+#ifdef CONFIG_CPU_32v5
+ if ((processor_id & 0xff0f0000) == 0x56050000) {
+ u32 feat;
+
+ /*
+ * v5 CPUs with Tauros2 have the L2 cache enable bit
+ * located in the CPU Extra Features register.
+ */
+ feat = read_extra_features();
+ if (!(feat & 0x00400000)) {
+ printk(KERN_INFO "Tauros2: Enabling L2 cache.\n");
+ write_extra_features(feat | 0x00400000);
+ }
+
+ mode = "ARMv5";
+ outer_cache.inv_range = tauros2_inv_range;
+ outer_cache.clean_range = tauros2_clean_range;
+ outer_cache.flush_range = tauros2_flush_range;
+ outer_cache.disable = tauros2_disable;
+ outer_cache.resume = tauros2_resume;
+ }
+#endif
+
+#ifdef CONFIG_CPU_32v7
+ /*
+ * Check whether this CPU has support for the v7 hierarchical
+ * cache ops. (PJ4 is in its v7 personality mode if the MMFR3
+ * register indicates support for the v7 hierarchical cache
+ * ops.)
+ *
+ * (Although strictly speaking there may exist CPUs that
+ * implement the v7 cache ops but are only ARMv6 CPUs (due to
+ * not complying with all of the other ARMv7 requirements),
+ * there are no real-life examples of Tauros2 being used on
+ * such CPUs as of yet.)
+ */
+ if (cpuid_scheme() && (read_mmfr3() & 0xf) == 1) {
+ u32 actlr;
+
+ /*
+ * When Tauros2 is used in an ARMv7 system, the L2
+ * enable bit is located in the Auxiliary System Control
+ * Register (which is the only register allowed by the
+ * ARMv7 spec to contain fine-grained cache control bits).
+ */
+ actlr = read_actlr();
+ if (!(actlr & 0x00000002)) {
+ printk(KERN_INFO "Tauros2: Enabling L2 cache.\n");
+ write_actlr(actlr | 0x00000002);
+ }
+
+ mode = "ARMv7";
+ }
+#endif
+
+ if (mode == NULL) {
+ printk(KERN_CRIT "Tauros2: Unable to detect CPU mode.\n");
+ return;
+ }
+
+ printk(KERN_INFO "Tauros2: L2 cache support initialised "
+ "in %s mode.\n", mode);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id tauros2_ids[] __initconst = {
+ { .compatible = "marvell,tauros2-cache"},
+ {}
+};
+#endif
+
+void __init tauros2_init(unsigned int features)
+{
+#ifdef CONFIG_OF
+ struct device_node *node;
+ int ret;
+ unsigned int f;
+
+ node = of_find_matching_node(NULL, tauros2_ids);
+ if (!node) {
+ pr_info("Not found marvell,tauros2-cache, disable it\n");
+ return;
+ }
+
+ ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f);
+ if (ret) {
+ pr_info("Not found marvell,tauros-cache-features property, "
+ "disable extra features\n");
+ features = 0;
+ } else
+ features = f;
+#endif
+ tauros2_internal_init(features);
+}
diff --git a/arch/arm/mm/cache-tauros3.h b/arch/arm/mm/cache-tauros3.h
new file mode 100644
index 00000000000..02c0a97cbc0
--- /dev/null
+++ b/arch/arm/mm/cache-tauros3.h
@@ -0,0 +1,41 @@
+/*
+ * Marvell Tauros3 cache controller includes
+ *
+ * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
+ *
+ * based on GPL'ed 2.6 kernel sources
+ * (c) Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_ARM_HARDWARE_TAUROS3_H
+#define __ASM_ARM_HARDWARE_TAUROS3_H
+
+/*
+ * Marvell Tauros3 L2CC is compatible with PL310 r0p0
+ * but with PREFETCH_CTRL (r2p0) and an additional event counter.
+ * Also, there is AUX2_CTRL for some Marvell specific control.
+ */
+
+#define TAUROS3_EVENT_CNT2_CFG 0x224
+#define TAUROS3_EVENT_CNT2_VAL 0x228
+#define TAUROS3_INV_ALL 0x780
+#define TAUROS3_CLEAN_ALL 0x784
+#define TAUROS3_AUX2_CTRL 0x820
+
+/* Registers shifts and masks */
+#define TAUROS3_AUX2_CTRL_LINEFILL_BURST8_EN (1 << 2)
+
+#endif
diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S
deleted file mode 100644
index e1994788cf0..00000000000
--- a/arch/arm/mm/cache-v3.S
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * linux/arch/arm/mm/cache-v3.S
- *
- * Copyright (C) 1997-2002 Russell king
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/hardware.h>
-#include <asm/page.h>
-#include "proc-macros.S"
-
-/*
- * flush_user_cache_all()
- *
- * Invalidate all cache entries in a particular address
- * space.
- *
- * - mm - mm_struct describing address space
- */
-ENTRY(v3_flush_user_cache_all)
- /* FALLTHROUGH */
-/*
- * flush_kern_cache_all()
- *
- * Clean and invalidate the entire cache.
- */
-ENTRY(v3_flush_kern_cache_all)
- /* FALLTHROUGH */
-
-/*
- * flush_user_cache_range(start, end, flags)
- *
- * Invalidate a range of cache entries in the specified
- * address space.
- *
- * - start - start address (may not be aligned)
- * - end - end address (exclusive, may not be aligned)
- * - flags - vma_area_struct flags describing address space
- */
-ENTRY(v3_flush_user_cache_range)
- mov ip, #0
- mcreq p15, 0, ip, c7, c0, 0 @ flush ID cache
- mov pc, lr
-
-/*
- * coherent_kern_range(start, end)
- *
- * Ensure coherency between the Icache and the Dcache in the
- * region described by start. If you have non-snooping
- * Harvard caches, you need to implement this function.
- *
- * - start - virtual start address
- * - end - virtual end address
- */
-ENTRY(v3_coherent_kern_range)
- /* FALLTHROUGH */
-
-/*
- * coherent_user_range(start, end)
- *
- * Ensure coherency between the Icache and the Dcache in the
- * region described by start. If you have non-snooping
- * Harvard caches, you need to implement this function.
- *
- * - start - virtual start address
- * - end - virtual end address
- */
-ENTRY(v3_coherent_user_range)
- mov pc, lr
-
-/*
- * flush_kern_dcache_page(void *page)
- *
- * Ensure no D cache aliasing occurs, either with itself or
- * the I cache
- *
- * - addr - page aligned address
- */
-ENTRY(v3_flush_kern_dcache_page)
- /* FALLTHROUGH */
-
-/*
- * dma_inv_range(start, end)
- *
- * Invalidate (discard) the specified virtual address range.
- * May not write back any entries. If 'start' or 'end'
- * are not cache line aligned, those lines must be written
- * back.
- *
- * - start - virtual start address
- * - end - virtual end address
- */
-ENTRY(v3_dma_inv_range)
- /* FALLTHROUGH */
-
-/*
- * dma_flush_range(start, end)
- *
- * Clean and invalidate the specified virtual address range.
- *
- * - start - virtual start address
- * - end - virtual end address
- */
-ENTRY(v3_dma_flush_range)
- mov r0, #0
- mcr p15, 0, r0, c7, c0, 0 @ flush ID cache
- /* FALLTHROUGH */
-
-/*
- * dma_clean_range(start, end)
- *
- * Clean (write back) the specified virtual address range.
- *
- * - start - virtual start address
- * - end - virtual end address
- */
-ENTRY(v3_dma_clean_range)
- mov pc, lr
-
- __INITDATA
-
- .type v3_cache_fns, #object
-ENTRY(v3_cache_fns)
- .long v3_flush_kern_cache_all
- .long v3_flush_user_cache_all
- .long v3_flush_user_cache_range
- .long v3_coherent_kern_range
- .long v3_coherent_user_range
- .long v3_flush_kern_dcache_page
- .long v3_dma_inv_range
- .long v3_dma_clean_range
- .long v3_dma_flush_range
- .size v3_cache_fns, . - v3_cache_fns
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
index b2908063ed6..a7ba68f59f0 100644
--- a/arch/arm/mm/cache-v4.S
+++ b/arch/arm/mm/cache-v4.S
@@ -9,11 +9,19 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
-#include <asm/hardware.h>
#include <asm/page.h>
#include "proc-macros.S"
/*
+ * flush_icache_all()
+ *
+ * Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(v4_flush_icache_all)
+ mov pc, lr
+ENDPROC(v4_flush_icache_all)
+
+/*
* flush_user_cache_all()
*
* Invalidate all cache entries in a particular address
@@ -29,7 +37,7 @@ ENTRY(v4_flush_user_cache_all)
* Clean and invalidate the entire cache.
*/
ENTRY(v4_flush_kern_cache_all)
-#ifdef CPU_CP15
+#ifdef CONFIG_CPU_CP15
mov r0, #0
mcr p15, 0, r0, c7, c7, 0 @ flush ID cache
mov pc, lr
@@ -48,9 +56,9 @@ ENTRY(v4_flush_kern_cache_all)
* - flags - vma_area_struct flags describing address space
*/
ENTRY(v4_flush_user_cache_range)
-#ifdef CPU_CP15
+#ifdef CONFIG_CPU_CP15
mov ip, #0
- mcreq p15, 0, ip, c7, c7, 0 @ flush ID cache
+ mcr p15, 0, ip, c7, c7, 0 @ flush ID cache
mov pc, lr
#else
/* FALLTHROUGH */
@@ -80,31 +88,19 @@ ENTRY(v4_coherent_kern_range)
* - end - virtual end address
*/
ENTRY(v4_coherent_user_range)
+ mov r0, #0
mov pc, lr
/*
- * flush_kern_dcache_page(void *page)
+ * flush_kern_dcache_area(void *addr, size_t size)
*
* Ensure no D cache aliasing occurs, either with itself or
* the I cache
*
- * - addr - page aligned address
+ * - addr - kernel address
+ * - size - region size
*/
-ENTRY(v4_flush_kern_dcache_page)
- /* FALLTHROUGH */
-
-/*
- * dma_inv_range(start, end)
- *
- * Invalidate (discard) the specified virtual address range.
- * May not write back any entries. If 'start' or 'end'
- * are not cache line aligned, those lines must be written
- * back.
- *
- * - start - virtual start address
- * - end - virtual end address
- */
-ENTRY(v4_dma_inv_range)
+ENTRY(v4_flush_kern_dcache_area)
/* FALLTHROUGH */
/*
@@ -116,34 +112,38 @@ ENTRY(v4_dma_inv_range)
* - end - virtual end address
*/
ENTRY(v4_dma_flush_range)
-#ifdef CPU_CP15
+#ifdef CONFIG_CPU_CP15
mov r0, #0
mcr p15, 0, r0, c7, c7, 0 @ flush ID cache
#endif
+ mov pc, lr
+
+/*
+ * dma_unmap_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(v4_dma_unmap_area)
+ teq r2, #DMA_TO_DEVICE
+ bne v4_dma_flush_range
/* FALLTHROUGH */
/*
- * dma_clean_range(start, end)
- *
- * Clean (write back) the specified virtual address range.
- *
- * - start - virtual start address
- * - end - virtual end address
+ * dma_map_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
*/
-ENTRY(v4_dma_clean_range)
+ENTRY(v4_dma_map_area)
mov pc, lr
+ENDPROC(v4_dma_unmap_area)
+ENDPROC(v4_dma_map_area)
+
+ .globl v4_flush_kern_cache_louis
+ .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all
__INITDATA
- .type v4_cache_fns, #object
-ENTRY(v4_cache_fns)
- .long v4_flush_kern_cache_all
- .long v4_flush_user_cache_all
- .long v4_flush_user_cache_range
- .long v4_coherent_kern_range
- .long v4_coherent_user_range
- .long v4_flush_kern_dcache_page
- .long v4_dma_inv_range
- .long v4_dma_clean_range
- .long v4_dma_flush_range
- .size v4_cache_fns, . - v4_cache_fns
+ @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+ define_cache_functions v4
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
index 2ebc1b3bf85..cd494532140 100644
--- a/arch/arm/mm/cache-v4wb.S
+++ b/arch/arm/mm/cache-v4wb.S
@@ -32,7 +32,7 @@
/*
* This is the size at which it becomes more efficient to
* clean the whole cache, rather than using the individual
- * cache line maintainence instructions.
+ * cache line maintenance instructions.
*
* Size Clean (ticks) Dirty (ticks)
* 4096 21 20 21 53 55 54
@@ -51,6 +51,17 @@ flush_base:
.text
/*
+ * flush_icache_all()
+ *
+ * Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(v4wb_flush_icache_all)
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
+ mov pc, lr
+ENDPROC(v4wb_flush_icache_all)
+
+/*
* flush_user_cache_all()
*
* Clean and invalidate all cache entries in a particular address
@@ -114,15 +125,16 @@ ENTRY(v4wb_flush_user_cache_range)
mov pc, lr
/*
- * flush_kern_dcache_page(void *page)
+ * flush_kern_dcache_area(void *addr, size_t size)
*
* Ensure no D cache aliasing occurs, either with itself or
* the I cache
*
- * - addr - page aligned address
+ * - addr - kernel address
+ * - size - region size
*/
-ENTRY(v4wb_flush_kern_dcache_page)
- add r1, r0, #PAGE_SZ
+ENTRY(v4wb_flush_kern_dcache_area)
+ add r1, r0, r1
/* fall through */
/*
@@ -155,9 +167,9 @@ ENTRY(v4wb_coherent_user_range)
add r0, r0, #CACHE_DLINESIZE
cmp r0, r1
blo 1b
- mov ip, #0
- mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
- mcr p15, 0, ip, c7, c10, 4 @ drain WB
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
+ mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov pc, lr
@@ -172,7 +184,7 @@ ENTRY(v4wb_coherent_user_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(v4wb_dma_inv_range)
+v4wb_dma_inv_range:
tst r0, #CACHE_DLINESIZE - 1
bic r0, r0, #CACHE_DLINESIZE - 1
mcrne p15, 0, r0, c7, c10, 1 @ clean D entry
@@ -193,7 +205,7 @@ ENTRY(v4wb_dma_inv_range)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(v4wb_dma_clean_range)
+v4wb_dma_clean_range:
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry
add r0, r0, #CACHE_DLINESIZE
@@ -215,17 +227,34 @@ ENTRY(v4wb_dma_clean_range)
.globl v4wb_dma_flush_range
.set v4wb_dma_flush_range, v4wb_coherent_kern_range
+/*
+ * dma_map_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(v4wb_dma_map_area)
+ add r1, r1, r0
+ cmp r2, #DMA_TO_DEVICE
+ beq v4wb_dma_clean_range
+ bcs v4wb_dma_inv_range
+ b v4wb_dma_flush_range
+ENDPROC(v4wb_dma_map_area)
+
+/*
+ * dma_unmap_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(v4wb_dma_unmap_area)
+ mov pc, lr
+ENDPROC(v4wb_dma_unmap_area)
+
+ .globl v4wb_flush_kern_cache_louis
+ .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all
+
__INITDATA
- .type v4wb_cache_fns, #object
-ENTRY(v4wb_cache_fns)
- .long v4wb_flush_kern_cache_all
- .long v4wb_flush_user_cache_all
- .long v4wb_flush_user_cache_range
- .long v4wb_coherent_kern_range
- .long v4wb_coherent_user_range
- .long v4wb_flush_kern_dcache_page
- .long v4wb_dma_inv_range
- .long v4wb_dma_clean_range
- .long v4wb_dma_flush_range
- .size v4wb_cache_fns, . - v4wb_cache_fns
+ @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+ define_cache_functions v4wb
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
index 9bcabd86c6f..11e5e5838bc 100644
--- a/arch/arm/mm/cache-v4wt.S
+++ b/arch/arm/mm/cache-v4wt.S
@@ -13,7 +13,6 @@
*/
#include <linux/linkage.h>
#include <linux/init.h>
-#include <asm/hardware.h>
#include <asm/page.h>
#include "proc-macros.S"
@@ -35,13 +34,24 @@
/*
* This is the size at which it becomes more efficient to
* clean the whole cache, rather than using the individual
- * cache line maintainence instructions.
+ * cache line maintenance instructions.
*
* *** This needs benchmarking
*/
#define CACHE_DLIMIT 16384
/*
+ * flush_icache_all()
+ *
+ * Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(v4wt_flush_icache_all)
+ mov r0, #0
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
+ mov pc, lr
+ENDPROC(v4wt_flush_icache_all)
+
+/*
* flush_user_cache_all()
*
* Invalidate all cache entries in a particular address
@@ -115,20 +125,22 @@ ENTRY(v4wt_coherent_user_range)
add r0, r0, #CACHE_DLINESIZE
cmp r0, r1
blo 1b
+ mov r0, #0
mov pc, lr
/*
- * flush_kern_dcache_page(void *page)
+ * flush_kern_dcache_area(void *addr, size_t size)
*
* Ensure no D cache aliasing occurs, either with itself or
* the I cache
*
- * - addr - page aligned address
+ * - addr - kernel address
+ * - size - region size
*/
-ENTRY(v4wt_flush_kern_dcache_page)
+ENTRY(v4wt_flush_kern_dcache_area)
mov r2, #0
mcr p15, 0, r2, c7, c5, 0 @ invalidate I cache
- add r1, r0, #PAGE_SZ
+ add r1, r0, r1
/* fallthrough */
/*
@@ -142,23 +154,12 @@ ENTRY(v4wt_flush_kern_dcache_page)
* - start - virtual start address
* - end - virtual end address
*/
-ENTRY(v4wt_dma_inv_range)
+v4wt_dma_inv_range:
bic r0, r0, #CACHE_DLINESIZE - 1
1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry
add r0, r0, #CACHE_DLINESIZE
cmp r0, r1
blo 1b
- /* FALLTHROUGH */
-
-/*
- * dma_clean_range(start, end)
- *
- * Clean the specified virtual address range.
- *
- * - start - virtual start address
- * - end - virtual end address
- */
-ENTRY(v4wt_dma_clean_range)
mov pc, lr
/*
@@ -172,17 +173,33 @@ ENTRY(v4wt_dma_clean_range)
.globl v4wt_dma_flush_range
.equ v4wt_dma_flush_range, v4wt_dma_inv_range
+/*
+ * dma_unmap_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(v4wt_dma_unmap_area)
+ add r1, r1, r0
+ teq r2, #DMA_TO_DEVICE
+ bne v4wt_dma_inv_range
+ /* FALLTHROUGH */
+
+/*
+ * dma_map_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(v4wt_dma_map_area)
+ mov pc, lr
+ENDPROC(v4wt_dma_unmap_area)
+ENDPROC(v4wt_dma_map_area)
+
+ .globl v4wt_flush_kern_cache_louis
+ .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all
+
__INITDATA
- .type v4wt_cache_fns, #object
-ENTRY(v4wt_cache_fns)
- .long v4wt_flush_kern_cache_all
- .long v4wt_flush_user_cache_all
- .long v4wt_flush_user_cache_range
- .long v4wt_coherent_kern_range
- .long v4wt_coherent_user_range
- .long v4wt_flush_kern_dcache_page
- .long v4wt_dma_inv_range
- .long v4wt_dma_clean_range
- .long v4wt_dma_flush_range
- .size v4wt_cache_fns, . - v4wt_cache_fns
+ @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+ define_cache_functions v4wt
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
index 2c6c2a7c05a..d8fd4d4bd3d 100644
--- a/arch/arm/mm/cache-v6.S
+++ b/arch/arm/mm/cache-v6.S
@@ -12,6 +12,8 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/unwind.h>
#include "proc-macros.S"
@@ -21,6 +23,38 @@
#define BTB_FLUSH_SIZE 8
/*
+ * v6_flush_icache_all()
+ *
+ * Flush the whole I-cache.
+ *
+ * ARM1136 erratum 411920 - Invalidate Instruction Cache operation can fail.
+ * This erratum is present in 1136, 1156 and 1176. It does not affect the
+ * MPCore.
+ *
+ * Registers:
+ * r0 - set to 0
+ * r1 - corrupted
+ */
+ENTRY(v6_flush_icache_all)
+ mov r0, #0
+#ifdef CONFIG_ARM_ERRATA_411920
+ mrs r1, cpsr
+ cpsid ifa @ disable interrupts
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache
+ msr cpsr_cx, r1 @ restore interrupts
+ .rept 11 @ ARM Ltd recommends at least
+ nop @ 11 NOPs
+ .endr
+#else
+ mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache
+#endif
+ mov pc, lr
+ENDPROC(v6_flush_icache_all)
+
+/*
* v6_flush_cache_all()
*
* Flush the entire cache.
@@ -31,8 +65,12 @@ ENTRY(v6_flush_kern_cache_all)
mov r0, #0
#ifdef HARVARD_CACHE
mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate
+#ifndef CONFIG_ARM_ERRATA_411920
mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate
#else
+ b v6_flush_icache_all
+#endif
+#else
mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate
#endif
mov pc, lr
@@ -92,10 +130,11 @@ ENTRY(v6_coherent_kern_range)
* - the Icache does not read data from the write buffer
*/
ENTRY(v6_coherent_user_range)
-
+ UNWIND(.fnstart )
#ifdef HARVARD_CACHE
bic r0, r0, #CACHE_LINE_SIZE - 1
-1: mcr p15, 0, r0, c7, c10, 1 @ clean D line
+1:
+ USER( mcr p15, 0, r0, c7, c10, 1 ) @ clean D line
add r0, r0, #CACHE_LINE_SIZE
cmp r0, r1
blo 1b
@@ -103,22 +142,39 @@ ENTRY(v6_coherent_user_range)
mov r0, #0
#ifdef HARVARD_CACHE
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
+#ifndef CONFIG_ARM_ERRATA_411920
mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate
#else
+ b v6_flush_icache_all
+#endif
+#else
mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB
#endif
mov pc, lr
/*
- * v6_flush_kern_dcache_page(kaddr)
+ * Fault handling for the cache operation above. If the virtual address in r0
+ * isn't mapped, fail with -EFAULT.
+ */
+9001:
+ mov r0, #-EFAULT
+ mov pc, lr
+ UNWIND(.fnend )
+ENDPROC(v6_coherent_user_range)
+ENDPROC(v6_coherent_kern_range)
+
+/*
+ * v6_flush_kern_dcache_area(void *addr, size_t size)
*
* Ensure that the data held in the page kaddr is written back
* to the page in question.
*
- * - kaddr - kernel address (guaranteed to be page aligned)
+ * - addr - kernel address
+ * - size - region size
*/
-ENTRY(v6_flush_kern_dcache_page)
- add r1, r0, #PAGE_SZ
+ENTRY(v6_flush_kern_dcache_area)
+ add r1, r0, r1
+ bic r0, r0, #D_CACHE_LINE_SIZE - 1
1:
#ifdef HARVARD_CACHE
mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line
@@ -145,7 +201,11 @@ ENTRY(v6_flush_kern_dcache_page)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-ENTRY(v6_dma_inv_range)
+v6_dma_inv_range:
+#ifdef CONFIG_DMA_CACHE_RWFO
+ ldrb r2, [r0] @ read for ownership
+ strb r2, [r0] @ write for ownership
+#endif
tst r0, #D_CACHE_LINE_SIZE - 1
bic r0, r0, #D_CACHE_LINE_SIZE - 1
#ifdef HARVARD_CACHE
@@ -154,6 +214,10 @@ ENTRY(v6_dma_inv_range)
mcrne p15, 0, r0, c7, c11, 1 @ clean unified line
#endif
tst r1, #D_CACHE_LINE_SIZE - 1
+#ifdef CONFIG_DMA_CACHE_RWFO
+ ldrneb r2, [r1, #-1] @ read for ownership
+ strneb r2, [r1, #-1] @ write for ownership
+#endif
bic r1, r1, #D_CACHE_LINE_SIZE - 1
#ifdef HARVARD_CACHE
mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line
@@ -168,6 +232,10 @@ ENTRY(v6_dma_inv_range)
#endif
add r0, r0, #D_CACHE_LINE_SIZE
cmp r0, r1
+#ifdef CONFIG_DMA_CACHE_RWFO
+ ldrlo r2, [r0] @ read for ownership
+ strlo r2, [r0] @ write for ownership
+#endif
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
@@ -178,9 +246,12 @@ ENTRY(v6_dma_inv_range)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-ENTRY(v6_dma_clean_range)
+v6_dma_clean_range:
bic r0, r0, #D_CACHE_LINE_SIZE - 1
1:
+#ifdef CONFIG_DMA_CACHE_RWFO
+ ldr r2, [r0] @ read for ownership
+#endif
#ifdef HARVARD_CACHE
mcr p15, 0, r0, c7, c10, 1 @ clean D line
#else
@@ -199,6 +270,10 @@ ENTRY(v6_dma_clean_range)
* - end - virtual end address of region
*/
ENTRY(v6_dma_flush_range)
+#ifdef CONFIG_DMA_CACHE_RWFO
+ ldrb r2, [r0] @ read for ownership
+ strb r2, [r0] @ write for ownership
+#endif
bic r0, r0, #D_CACHE_LINE_SIZE - 1
1:
#ifdef HARVARD_CACHE
@@ -208,22 +283,53 @@ ENTRY(v6_dma_flush_range)
#endif
add r0, r0, #D_CACHE_LINE_SIZE
cmp r0, r1
+#ifdef CONFIG_DMA_CACHE_RWFO
+ ldrlob r2, [r0] @ read for ownership
+ strlob r2, [r0] @ write for ownership
+#endif
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
mov pc, lr
+/*
+ * dma_map_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(v6_dma_map_area)
+ add r1, r1, r0
+ teq r2, #DMA_FROM_DEVICE
+ beq v6_dma_inv_range
+#ifndef CONFIG_DMA_CACHE_RWFO
+ b v6_dma_clean_range
+#else
+ teq r2, #DMA_TO_DEVICE
+ beq v6_dma_clean_range
+ b v6_dma_flush_range
+#endif
+ENDPROC(v6_dma_map_area)
+
+/*
+ * dma_unmap_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(v6_dma_unmap_area)
+#ifndef CONFIG_DMA_CACHE_RWFO
+ add r1, r1, r0
+ teq r2, #DMA_TO_DEVICE
+ bne v6_dma_inv_range
+#endif
+ mov pc, lr
+ENDPROC(v6_dma_unmap_area)
+
+ .globl v6_flush_kern_cache_louis
+ .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all
+
__INITDATA
- .type v6_cache_fns, #object
-ENTRY(v6_cache_fns)
- .long v6_flush_kern_cache_all
- .long v6_flush_user_cache_all
- .long v6_flush_user_cache_range
- .long v6_coherent_kern_range
- .long v6_coherent_user_range
- .long v6_flush_kern_dcache_page
- .long v6_dma_inv_range
- .long v6_dma_clean_range
- .long v6_dma_flush_range
- .size v6_cache_fns, . - v6_cache_fns
+ @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+ define_cache_functions v6
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 35ffc4d9599..615c99e38ba 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -13,33 +13,131 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/unwind.h>
#include "proc-macros.S"
/*
+ * The secondary kernel init calls v7_flush_dcache_all before it enables
+ * the L1; however, the L1 comes out of reset in an undefined state, so
+ * the clean + invalidate performed by v7_flush_dcache_all causes a bunch
+ * of cache lines with uninitialized data and uninitialized tags to get
+ * written out to memory, which does really unpleasant things to the main
+ * processor. We fix this by performing an invalidate, rather than a
+ * clean + invalidate, before jumping into the kernel.
+ *
+ * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs
+ * to be called for both secondary cores startup and primary core resume
+ * procedures.
+ */
+ENTRY(v7_invalidate_l1)
+ mov r0, #0
+ mcr p15, 2, r0, c0, c0, 0
+ mrc p15, 1, r0, c0, c0, 0
+
+ ldr r1, =0x7fff
+ and r2, r1, r0, lsr #13
+
+ ldr r1, =0x3ff
+
+ and r3, r1, r0, lsr #3 @ NumWays - 1
+ add r2, r2, #1 @ NumSets
+
+ and r0, r0, #0x7
+ add r0, r0, #4 @ SetShift
+
+ clz r1, r3 @ WayShift
+ add r4, r3, #1 @ NumWays
+1: sub r2, r2, #1 @ NumSets--
+ mov r3, r4 @ Temp = NumWays
+2: subs r3, r3, #1 @ Temp--
+ mov r5, r3, lsl r1
+ mov r6, r2, lsl r0
+ orr r5, r5, r6 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+ mcr p15, 0, r5, c7, c6, 2
+ bgt 2b
+ cmp r2, #0
+ bgt 1b
+ dsb st
+ isb
+ mov pc, lr
+ENDPROC(v7_invalidate_l1)
+
+/*
+ * v7_flush_icache_all()
+ *
+ * Flush the whole I-cache.
+ *
+ * Registers:
+ * r0 - set to 0
+ */
+ENTRY(v7_flush_icache_all)
+ mov r0, #0
+ ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable
+ ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
+ mov pc, lr
+ENDPROC(v7_flush_icache_all)
+
+ /*
+ * v7_flush_dcache_louis()
+ *
+ * Flush the D-cache up to the Level of Unification Inner Shareable
+ *
+ * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
+ */
+
+ENTRY(v7_flush_dcache_louis)
+ dmb @ ensure ordering with previous memory accesses
+ mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr
+ ALT_SMP(ands r3, r0, #(7 << 21)) @ extract LoUIS from clidr
+ ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr
+#ifdef CONFIG_ARM_ERRATA_643719
+ ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register
+ ALT_UP(moveq pc, lr) @ LoUU is zero, so nothing to do
+ ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p?
+ biceq r2, r2, #0x0000000f @ clear minor revision number
+ teqeq r2, r1 @ test for errata affected core and if so...
+ orreqs r3, #(1 << 21) @ fix LoUIS value (and set flags state to 'ne')
+#endif
+ ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2
+ ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2
+ moveq pc, lr @ return if level == 0
+ mov r10, #0 @ r10 (starting level) = 0
+ b flush_levels @ start flushing cache levels
+ENDPROC(v7_flush_dcache_louis)
+
+/*
* v7_flush_dcache_all()
*
* Flush the whole D-cache.
*
- * Corrupted registers: r0-r5, r7, r9-r11
+ * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
*
* - mm - mm_struct describing address space
*/
ENTRY(v7_flush_dcache_all)
+ dmb @ ensure ordering with previous memory accesses
mrc p15, 1, r0, c0, c0, 1 @ read clidr
ands r3, r0, #0x7000000 @ extract loc from clidr
mov r3, r3, lsr #23 @ left align loc bit field
beq finished @ if loc is 0, then no need to clean
mov r10, #0 @ start clean at cache level 0
-loop1:
+flush_levels:
add r2, r10, r10, lsr #1 @ work out 3x current cache level
mov r1, r0, lsr r2 @ extract cache type bits from clidr
and r1, r1, #7 @ mask of the bits for current cache only
cmp r1, #2 @ see what cache we have at this level
blt skip @ skip if no cache, or just i-cache
+#ifdef CONFIG_PREEMPT
+ save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic
+#endif
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
isb @ isb to sych the new cssr&csidr
mrc p15, 1, r1, c0, c0, 0 @ read the new csidr
+#ifdef CONFIG_PREEMPT
+ restore_irqs_notrace r9
+#endif
and r2, r1, #7 @ extract the length of the cache lines
add r2, r2, #4 @ add 4 (line length offset)
ldr r4, =0x3ff
@@ -47,25 +145,31 @@ loop1:
clz r5, r4 @ find bit position of way size increment
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 @ extract max number of the index size
+loop1:
+ mov r9, r7 @ create working copy of max index
loop2:
- mov r9, r4 @ create working copy of max way size
-loop3:
- orr r11, r10, r9, lsl r5 @ factor way and cache number into r11
- orr r11, r11, r7, lsl r2 @ factor index number into r11
+ ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11
+ THUMB( lsl r6, r4, r5 )
+ THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11
+ ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11
+ THUMB( lsl r6, r9, r2 )
+ THUMB( orr r11, r11, r6 ) @ factor index number into r11
mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
- subs r9, r9, #1 @ decrement the way
- bge loop3
- subs r7, r7, #1 @ decrement the index
+ subs r9, r9, #1 @ decrement the index
bge loop2
+ subs r4, r4, #1 @ decrement the way
+ bge loop1
skip:
add r10, r10, #2 @ increment cache number
cmp r3, r10
- bgt loop1
+ bgt flush_levels
finished:
mov r10, #0 @ swith back to cache level 0
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr
+ dsb st
isb
mov pc, lr
+ENDPROC(v7_flush_dcache_all)
/*
* v7_flush_cache_all()
@@ -73,18 +177,40 @@ finished:
* Flush the entire cache system.
* The data cache flush is now achieved using atomic clean / invalidates
* working outwards from L1 cache. This is done using Set/Way based cache
- * maintainance instructions.
+ * maintenance instructions.
* The instruction cache can still be invalidated back to the point of
* unification in a single instruction.
*
*/
ENTRY(v7_flush_kern_cache_all)
- stmfd sp!, {r4-r5, r7, r9-r11, lr}
+ ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( stmfd sp!, {r4-r7, r9-r11, lr} )
bl v7_flush_dcache_all
mov r0, #0
- mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate
- ldmfd sp!, {r4-r5, r7, r9-r11, lr}
+ ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable
+ ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
+ ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} )
mov pc, lr
+ENDPROC(v7_flush_kern_cache_all)
+
+ /*
+ * v7_flush_kern_cache_louis(void)
+ *
+ * Flush the data cache up to Level of Unification Inner Shareable.
+ * Invalidate the I-cache to the point of unification.
+ */
+ENTRY(v7_flush_kern_cache_louis)
+ ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( stmfd sp!, {r4-r7, r9-r11, lr} )
+ bl v7_flush_dcache_louis
+ mov r0, #0
+ ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable
+ ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate
+ ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} )
+ THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} )
+ mov pc, lr
+ENDPROC(v7_flush_kern_cache_louis)
/*
* v7_flush_cache_all()
@@ -110,6 +236,8 @@ ENTRY(v7_flush_user_cache_all)
*/
ENTRY(v7_flush_user_cache_range)
mov pc, lr
+ENDPROC(v7_flush_user_cache_all)
+ENDPROC(v7_flush_user_cache_range)
/*
* v7_coherent_kern_range(start,end)
@@ -141,39 +269,75 @@ ENTRY(v7_coherent_kern_range)
* - the Icache does not read data from the write buffer
*/
ENTRY(v7_coherent_user_range)
+ UNWIND(.fnstart )
dcache_line_size r2, r3
sub r3, r2, #1
- bic r0, r0, r3
-1: mcr p15, 0, r0, c7, c11, 1 @ clean D line to the point of unification
- dsb
- mcr p15, 0, r0, c7, c5, 1 @ invalidate I line
- add r0, r0, r2
- cmp r0, r1
+ bic r12, r0, r3
+#ifdef CONFIG_ARM_ERRATA_764369
+ ALT_SMP(W(dsb))
+ ALT_UP(W(nop))
+#endif
+1:
+ USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification
+ add r12, r12, r2
+ cmp r12, r1
blo 1b
+ dsb ishst
+ icache_line_size r2, r3
+ sub r3, r2, #1
+ bic r12, r0, r3
+2:
+ USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line
+ add r12, r12, r2
+ cmp r12, r1
+ blo 2b
mov r0, #0
- mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB
- dsb
+ ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable
+ ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB
+ dsb ishst
isb
mov pc, lr
/*
- * v7_flush_kern_dcache_page(kaddr)
+ * Fault handling for the cache operation above. If the virtual address in r0
+ * isn't mapped, fail with -EFAULT.
+ */
+9001:
+#ifdef CONFIG_ARM_ERRATA_775420
+ dsb
+#endif
+ mov r0, #-EFAULT
+ mov pc, lr
+ UNWIND(.fnend )
+ENDPROC(v7_coherent_kern_range)
+ENDPROC(v7_coherent_user_range)
+
+/*
+ * v7_flush_kern_dcache_area(void *addr, size_t size)
*
* Ensure that the data held in the page kaddr is written back
* to the page in question.
*
- * - kaddr - kernel address (guaranteed to be page aligned)
+ * - addr - kernel address
+ * - size - region size
*/
-ENTRY(v7_flush_kern_dcache_page)
+ENTRY(v7_flush_kern_dcache_area)
dcache_line_size r2, r3
- add r1, r0, #PAGE_SZ
+ add r1, r0, r1
+ sub r3, r2, #1
+ bic r0, r0, r3
+#ifdef CONFIG_ARM_ERRATA_764369
+ ALT_SMP(W(dsb))
+ ALT_UP(W(nop))
+#endif
1:
mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line
add r0, r0, r2
cmp r0, r1
blo 1b
- dsb
+ dsb st
mov pc, lr
+ENDPROC(v7_flush_kern_dcache_area)
/*
* v7_dma_inv_range(start,end)
@@ -185,11 +349,15 @@ ENTRY(v7_flush_kern_dcache_page)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-ENTRY(v7_dma_inv_range)
+v7_dma_inv_range:
dcache_line_size r2, r3
sub r3, r2, #1
tst r0, r3
bic r0, r0, r3
+#ifdef CONFIG_ARM_ERRATA_764369
+ ALT_SMP(W(dsb))
+ ALT_UP(W(nop))
+#endif
mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line
tst r1, r3
@@ -200,25 +368,31 @@ ENTRY(v7_dma_inv_range)
add r0, r0, r2
cmp r0, r1
blo 1b
- dsb
+ dsb st
mov pc, lr
+ENDPROC(v7_dma_inv_range)
/*
* v7_dma_clean_range(start,end)
* - start - virtual start address of region
* - end - virtual end address of region
*/
-ENTRY(v7_dma_clean_range)
+v7_dma_clean_range:
dcache_line_size r2, r3
sub r3, r2, #1
bic r0, r0, r3
+#ifdef CONFIG_ARM_ERRATA_764369
+ ALT_SMP(W(dsb))
+ ALT_UP(W(nop))
+#endif
1:
mcr p15, 0, r0, c7, c10, 1 @ clean D / U line
add r0, r0, r2
cmp r0, r1
blo 1b
- dsb
+ dsb st
mov pc, lr
+ENDPROC(v7_dma_clean_range)
/*
* v7_dma_flush_range(start,end)
@@ -229,25 +403,46 @@ ENTRY(v7_dma_flush_range)
dcache_line_size r2, r3
sub r3, r2, #1
bic r0, r0, r3
+#ifdef CONFIG_ARM_ERRATA_764369
+ ALT_SMP(W(dsb))
+ ALT_UP(W(nop))
+#endif
1:
mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line
add r0, r0, r2
cmp r0, r1
blo 1b
- dsb
+ dsb st
+ mov pc, lr
+ENDPROC(v7_dma_flush_range)
+
+/*
+ * dma_map_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(v7_dma_map_area)
+ add r1, r1, r0
+ teq r2, #DMA_FROM_DEVICE
+ beq v7_dma_inv_range
+ b v7_dma_clean_range
+ENDPROC(v7_dma_map_area)
+
+/*
+ * dma_unmap_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(v7_dma_unmap_area)
+ add r1, r1, r0
+ teq r2, #DMA_TO_DEVICE
+ bne v7_dma_inv_range
mov pc, lr
+ENDPROC(v7_dma_unmap_area)
__INITDATA
- .type v7_cache_fns, #object
-ENTRY(v7_cache_fns)
- .long v7_flush_kern_cache_all
- .long v7_flush_user_cache_all
- .long v7_flush_user_cache_range
- .long v7_coherent_kern_range
- .long v7_coherent_user_range
- .long v7_flush_kern_dcache_page
- .long v7_dma_inv_range
- .long v7_dma_clean_range
- .long v7_dma_flush_range
- .size v7_cache_fns, . - v7_cache_fns
+ @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+ define_cache_functions v7
diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c
new file mode 100644
index 00000000000..6c3edeb66e7
--- /dev/null
+++ b/arch/arm/mm/cache-xsc3l2.c
@@ -0,0 +1,220 @@
+/*
+ * arch/arm/mm/cache-xsc3l2.c - XScale3 L2 cache controller support
+ *
+ * Copyright (C) 2007 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/cacheflush.h>
+
+#define CR_L2 (1 << 26)
+
+#define CACHE_LINE_SIZE 32
+#define CACHE_LINE_SHIFT 5
+#define CACHE_WAY_PER_SET 8
+
+#define CACHE_WAY_SIZE(l2ctype) (8192 << (((l2ctype) >> 8) & 0xf))
+#define CACHE_SET_SIZE(l2ctype) (CACHE_WAY_SIZE(l2ctype) >> CACHE_LINE_SHIFT)
+
+static inline int xsc3_l2_present(void)
+{
+ unsigned long l2ctype;
+
+ __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype));
+
+ return !!(l2ctype & 0xf8);
+}
+
+static inline void xsc3_l2_clean_mva(unsigned long addr)
+{
+ __asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr));
+}
+
+static inline void xsc3_l2_inv_mva(unsigned long addr)
+{
+ __asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr));
+}
+
+static inline void xsc3_l2_inv_all(void)
+{
+ unsigned long l2ctype, set_way;
+ int set, way;
+
+ __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype));
+
+ for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) {
+ for (way = 0; way < CACHE_WAY_PER_SET; way++) {
+ set_way = (way << 29) | (set << 5);
+ __asm__("mcr p15, 1, %0, c7, c11, 2" : : "r"(set_way));
+ }
+ }
+
+ dsb();
+}
+
+static inline void l2_unmap_va(unsigned long va)
+{
+#ifdef CONFIG_HIGHMEM
+ if (va != -1)
+ kunmap_atomic((void *)va);
+#endif
+}
+
+static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va)
+{
+#ifdef CONFIG_HIGHMEM
+ unsigned long va = prev_va & PAGE_MASK;
+ unsigned long pa_offset = pa << (32 - PAGE_SHIFT);
+ if (unlikely(pa_offset < (prev_va << (32 - PAGE_SHIFT)))) {
+ /*
+ * Switching to a new page. Because cache ops are
+ * using virtual addresses only, we must put a mapping
+ * in place for it.
+ */
+ l2_unmap_va(prev_va);
+ va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT);
+ }
+ return va + (pa_offset >> (32 - PAGE_SHIFT));
+#else
+ return __phys_to_virt(pa);
+#endif
+}
+
+static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
+{
+ unsigned long vaddr;
+
+ if (start == 0 && end == -1ul) {
+ xsc3_l2_inv_all();
+ return;
+ }
+
+ vaddr = -1; /* to force the first mapping */
+
+ /*
+ * Clean and invalidate partial first cache line.
+ */
+ if (start & (CACHE_LINE_SIZE - 1)) {
+ vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr);
+ xsc3_l2_clean_mva(vaddr);
+ xsc3_l2_inv_mva(vaddr);
+ start = (start | (CACHE_LINE_SIZE - 1)) + 1;
+ }
+
+ /*
+ * Invalidate all full cache lines between 'start' and 'end'.
+ */
+ while (start < (end & ~(CACHE_LINE_SIZE - 1))) {
+ vaddr = l2_map_va(start, vaddr);
+ xsc3_l2_inv_mva(vaddr);
+ start += CACHE_LINE_SIZE;
+ }
+
+ /*
+ * Clean and invalidate partial last cache line.
+ */
+ if (start < end) {
+ vaddr = l2_map_va(start, vaddr);
+ xsc3_l2_clean_mva(vaddr);
+ xsc3_l2_inv_mva(vaddr);
+ }
+
+ l2_unmap_va(vaddr);
+
+ dsb();
+}
+
+static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
+{
+ unsigned long vaddr;
+
+ vaddr = -1; /* to force the first mapping */
+
+ start &= ~(CACHE_LINE_SIZE - 1);
+ while (start < end) {
+ vaddr = l2_map_va(start, vaddr);
+ xsc3_l2_clean_mva(vaddr);
+ start += CACHE_LINE_SIZE;
+ }
+
+ l2_unmap_va(vaddr);
+
+ dsb();
+}
+
+/*
+ * optimize L2 flush all operation by set/way format
+ */
+static inline void xsc3_l2_flush_all(void)
+{
+ unsigned long l2ctype, set_way;
+ int set, way;
+
+ __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype));
+
+ for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) {
+ for (way = 0; way < CACHE_WAY_PER_SET; way++) {
+ set_way = (way << 29) | (set << 5);
+ __asm__("mcr p15, 1, %0, c7, c15, 2" : : "r"(set_way));
+ }
+ }
+
+ dsb();
+}
+
+static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
+{
+ unsigned long vaddr;
+
+ if (start == 0 && end == -1ul) {
+ xsc3_l2_flush_all();
+ return;
+ }
+
+ vaddr = -1; /* to force the first mapping */
+
+ start &= ~(CACHE_LINE_SIZE - 1);
+ while (start < end) {
+ vaddr = l2_map_va(start, vaddr);
+ xsc3_l2_clean_mva(vaddr);
+ xsc3_l2_inv_mva(vaddr);
+ start += CACHE_LINE_SIZE;
+ }
+
+ l2_unmap_va(vaddr);
+
+ dsb();
+}
+
+static int __init xsc3_l2_init(void)
+{
+ if (!cpu_is_xsc3() || !xsc3_l2_present())
+ return 0;
+
+ if (get_cr() & CR_L2) {
+ pr_info("XScale3 L2 cache enabled.\n");
+ xsc3_l2_inv_all();
+
+ outer_cache.inv_range = xsc3_l2_inv_range;
+ outer_cache.clean_range = xsc3_l2_clean_range;
+ outer_cache.flush_range = xsc3_l2_flush_range;
+ }
+
+ return 0;
+}
+core_initcall(xsc3_l2_init);
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c
deleted file mode 100644
index cefdf2f9f26..00000000000
--- a/arch/arm/mm/consistent.c
+++ /dev/null
@@ -1,507 +0,0 @@
-/*
- * linux/arch/arm/mm/consistent.c
- *
- * Copyright (C) 2000-2004 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * DMA uncached mapping support.
- */
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/list.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-
-#include <asm/memory.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/sizes.h>
-
-/* Sanity check size */
-#if (CONSISTENT_DMA_SIZE % SZ_2M)
-#error "CONSISTENT_DMA_SIZE must be multiple of 2MiB"
-#endif
-
-#define CONSISTENT_END (0xffe00000)
-#define CONSISTENT_BASE (CONSISTENT_END - CONSISTENT_DMA_SIZE)
-
-#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
-#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PGDIR_SHIFT)
-#define NUM_CONSISTENT_PTES (CONSISTENT_DMA_SIZE >> PGDIR_SHIFT)
-
-
-/*
- * These are the page tables (2MB each) covering uncached, DMA consistent allocations
- */
-static pte_t *consistent_pte[NUM_CONSISTENT_PTES];
-static DEFINE_SPINLOCK(consistent_lock);
-
-/*
- * VM region handling support.
- *
- * This should become something generic, handling VM region allocations for
- * vmalloc and similar (ioremap, module space, etc).
- *
- * I envisage vmalloc()'s supporting vm_struct becoming:
- *
- * struct vm_struct {
- * struct vm_region region;
- * unsigned long flags;
- * struct page **pages;
- * unsigned int nr_pages;
- * unsigned long phys_addr;
- * };
- *
- * get_vm_area() would then call vm_region_alloc with an appropriate
- * struct vm_region head (eg):
- *
- * struct vm_region vmalloc_head = {
- * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
- * .vm_start = VMALLOC_START,
- * .vm_end = VMALLOC_END,
- * };
- *
- * However, vmalloc_head.vm_start is variable (typically, it is dependent on
- * the amount of RAM found at boot time.) I would imagine that get_vm_area()
- * would have to initialise this each time prior to calling vm_region_alloc().
- */
-struct vm_region {
- struct list_head vm_list;
- unsigned long vm_start;
- unsigned long vm_end;
- struct page *vm_pages;
- int vm_active;
-};
-
-static struct vm_region consistent_head = {
- .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
- .vm_start = CONSISTENT_BASE,
- .vm_end = CONSISTENT_END,
-};
-
-static struct vm_region *
-vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
-{
- unsigned long addr = head->vm_start, end = head->vm_end - size;
- unsigned long flags;
- struct vm_region *c, *new;
-
- new = kmalloc(sizeof(struct vm_region), gfp);
- if (!new)
- goto out;
-
- spin_lock_irqsave(&consistent_lock, flags);
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if ((addr + size) < addr)
- goto nospc;
- if ((addr + size) <= c->vm_start)
- goto found;
- addr = c->vm_end;
- if (addr > end)
- goto nospc;
- }
-
- found:
- /*
- * Insert this entry _before_ the one we found.
- */
- list_add_tail(&new->vm_list, &c->vm_list);
- new->vm_start = addr;
- new->vm_end = addr + size;
- new->vm_active = 1;
-
- spin_unlock_irqrestore(&consistent_lock, flags);
- return new;
-
- nospc:
- spin_unlock_irqrestore(&consistent_lock, flags);
- kfree(new);
- out:
- return NULL;
-}
-
-static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr)
-{
- struct vm_region *c;
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if (c->vm_active && c->vm_start == addr)
- goto out;
- }
- c = NULL;
- out:
- return c;
-}
-
-#ifdef CONFIG_HUGETLB_PAGE
-#error ARM Coherent DMA allocator does not (yet) support huge TLB
-#endif
-
-static void *
-__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
- pgprot_t prot)
-{
- struct page *page;
- struct vm_region *c;
- unsigned long order;
- u64 mask = ISA_DMA_THRESHOLD, limit;
-
- if (!consistent_pte[0]) {
- printk(KERN_ERR "%s: not initialised\n", __func__);
- dump_stack();
- return NULL;
- }
-
- if (dev) {
- mask = dev->coherent_dma_mask;
-
- /*
- * Sanity check the DMA mask - it must be non-zero, and
- * must be able to be satisfied by a DMA allocation.
- */
- if (mask == 0) {
- dev_warn(dev, "coherent DMA mask is unset\n");
- goto no_page;
- }
-
- if ((~mask) & ISA_DMA_THRESHOLD) {
- dev_warn(dev, "coherent DMA mask %#llx is smaller "
- "than system GFP_DMA mask %#llx\n",
- mask, (unsigned long long)ISA_DMA_THRESHOLD);
- goto no_page;
- }
- }
-
- /*
- * Sanity check the allocation size.
- */
- size = PAGE_ALIGN(size);
- limit = (mask + 1) & ~mask;
- if ((limit && size >= limit) ||
- size >= (CONSISTENT_END - CONSISTENT_BASE)) {
- printk(KERN_WARNING "coherent allocation too big "
- "(requested %#x mask %#llx)\n", size, mask);
- goto no_page;
- }
-
- order = get_order(size);
-
- if (mask != 0xffffffff)
- gfp |= GFP_DMA;
-
- page = alloc_pages(gfp, order);
- if (!page)
- goto no_page;
-
- /*
- * Invalidate any data that might be lurking in the
- * kernel direct-mapped region for device DMA.
- */
- {
- void *ptr = page_address(page);
- memset(ptr, 0, size);
- dmac_flush_range(ptr, ptr + size);
- outer_flush_range(__pa(ptr), __pa(ptr) + size);
- }
-
- /*
- * Allocate a virtual address in the consistent mapping region.
- */
- c = vm_region_alloc(&consistent_head, size,
- gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
- if (c) {
- pte_t *pte;
- struct page *end = page + (1 << order);
- int idx = CONSISTENT_PTE_INDEX(c->vm_start);
- u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
-
- pte = consistent_pte[idx] + off;
- c->vm_pages = page;
-
- split_page(page, order);
-
- /*
- * Set the "dma handle"
- */
- *handle = page_to_dma(dev, page);
-
- do {
- BUG_ON(!pte_none(*pte));
-
- /*
- * x86 does not mark the pages reserved...
- */
- SetPageReserved(page);
- set_pte_ext(pte, mk_pte(page, prot), 0);
- page++;
- pte++;
- off++;
- if (off >= PTRS_PER_PTE) {
- off = 0;
- pte = consistent_pte[++idx];
- }
- } while (size -= PAGE_SIZE);
-
- /*
- * Free the otherwise unused pages.
- */
- while (page < end) {
- __free_page(page);
- page++;
- }
-
- return (void *)c->vm_start;
- }
-
- if (page)
- __free_pages(page, order);
- no_page:
- *handle = ~0;
- return NULL;
-}
-
-/*
- * Allocate DMA-coherent memory space and return both the kernel remapped
- * virtual and bus address for that space.
- */
-void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
-{
- if (arch_is_coherent()) {
- void *virt;
-
- virt = kmalloc(size, gfp);
- if (!virt)
- return NULL;
- *handle = virt_to_dma(dev, virt);
-
- return virt;
- }
-
- return __dma_alloc(dev, size, handle, gfp,
- pgprot_noncached(pgprot_kernel));
-}
-EXPORT_SYMBOL(dma_alloc_coherent);
-
-/*
- * Allocate a writecombining region, in much the same way as
- * dma_alloc_coherent above.
- */
-void *
-dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
-{
- return __dma_alloc(dev, size, handle, gfp,
- pgprot_writecombine(pgprot_kernel));
-}
-EXPORT_SYMBOL(dma_alloc_writecombine);
-
-static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size)
-{
- unsigned long flags, user_size, kern_size;
- struct vm_region *c;
- int ret = -ENXIO;
-
- user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-
- spin_lock_irqsave(&consistent_lock, flags);
- c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
- spin_unlock_irqrestore(&consistent_lock, flags);
-
- if (c) {
- unsigned long off = vma->vm_pgoff;
-
- kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
-
- if (off < kern_size &&
- user_size <= (kern_size - off)) {
- vma->vm_flags |= VM_RESERVED;
- ret = remap_pfn_range(vma, vma->vm_start,
- page_to_pfn(c->vm_pages) + off,
- user_size << PAGE_SHIFT,
- vma->vm_page_prot);
- }
- }
-
- return ret;
-}
-
-int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size)
-{
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
-}
-EXPORT_SYMBOL(dma_mmap_coherent);
-
-int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size)
-{
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
- return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
-}
-EXPORT_SYMBOL(dma_mmap_writecombine);
-
-/*
- * free a page as defined by the above mapping.
- * Must not be called with IRQs disabled.
- */
-void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
-{
- struct vm_region *c;
- unsigned long flags, addr;
- pte_t *ptep;
- int idx;
- u32 off;
-
- WARN_ON(irqs_disabled());
-
- if (arch_is_coherent()) {
- kfree(cpu_addr);
- return;
- }
-
- size = PAGE_ALIGN(size);
-
- spin_lock_irqsave(&consistent_lock, flags);
- c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
- if (!c)
- goto no_area;
-
- c->vm_active = 0;
- spin_unlock_irqrestore(&consistent_lock, flags);
-
- if ((c->vm_end - c->vm_start) != size) {
- printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
- __func__, c->vm_end - c->vm_start, size);
- dump_stack();
- size = c->vm_end - c->vm_start;
- }
-
- idx = CONSISTENT_PTE_INDEX(c->vm_start);
- off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1);
- ptep = consistent_pte[idx] + off;
- addr = c->vm_start;
- do {
- pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
- unsigned long pfn;
-
- ptep++;
- addr += PAGE_SIZE;
- off++;
- if (off >= PTRS_PER_PTE) {
- off = 0;
- ptep = consistent_pte[++idx];
- }
-
- if (!pte_none(pte) && pte_present(pte)) {
- pfn = pte_pfn(pte);
-
- if (pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
-
- /*
- * x86 does not mark the pages reserved...
- */
- ClearPageReserved(page);
-
- __free_page(page);
- continue;
- }
- }
-
- printk(KERN_CRIT "%s: bad page in kernel page table\n",
- __func__);
- } while (size -= PAGE_SIZE);
-
- flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
- spin_lock_irqsave(&consistent_lock, flags);
- list_del(&c->vm_list);
- spin_unlock_irqrestore(&consistent_lock, flags);
-
- kfree(c);
- return;
-
- no_area:
- spin_unlock_irqrestore(&consistent_lock, flags);
- printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
- __func__, cpu_addr);
- dump_stack();
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
-/*
- * Initialise the consistent memory allocation.
- */
-static int __init consistent_init(void)
-{
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *pte;
- int ret = 0, i = 0;
- u32 base = CONSISTENT_BASE;
-
- do {
- pgd = pgd_offset(&init_mm, base);
- pmd = pmd_alloc(&init_mm, pgd, base);
- if (!pmd) {
- printk(KERN_ERR "%s: no pmd tables\n", __func__);
- ret = -ENOMEM;
- break;
- }
- WARN_ON(!pmd_none(*pmd));
-
- pte = pte_alloc_kernel(pmd, base);
- if (!pte) {
- printk(KERN_ERR "%s: no pte tables\n", __func__);
- ret = -ENOMEM;
- break;
- }
-
- consistent_pte[i++] = pte;
- base += (1 << PGDIR_SHIFT);
- } while (base < CONSISTENT_END);
-
- return ret;
-}
-
-core_initcall(consistent_init);
-
-/*
- * Make an area consistent for devices.
- * Note: Drivers should NOT use this function directly, as it will break
- * platforms with CONFIG_DMABOUNCE.
- * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
- */
-void dma_cache_maint(const void *start, size_t size, int direction)
-{
- const void *end = start + size;
-
- BUG_ON(!virt_addr_valid(start) || !virt_addr_valid(end - 1));
-
- switch (direction) {
- case DMA_FROM_DEVICE: /* invalidate only */
- dmac_inv_range(start, end);
- outer_inv_range(__pa(start), __pa(end));
- break;
- case DMA_TO_DEVICE: /* writeback only */
- dmac_clean_range(start, end);
- outer_clean_range(__pa(start), __pa(end));
- break;
- case DMA_BIDIRECTIONAL: /* writeback and invalidate */
- dmac_flush_range(start, end);
- outer_flush_range(__pa(start), __pa(end));
- break;
- default:
- BUG();
- }
-}
-EXPORT_SYMBOL(dma_cache_maint);
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index fc84fcc7438..6eb97b3a748 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -2,6 +2,9 @@
* linux/arch/arm/mm/context.c
*
* Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ * Copyright (C) 2012 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -10,55 +13,248 @@
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
#include <asm/mmu_context.h>
+#include <asm/smp_plat.h>
+#include <asm/thread_notify.h>
#include <asm/tlbflush.h>
+#include <asm/proc-fns.h>
+
+/*
+ * On ARMv6, we have the following structure in the Context ID:
+ *
+ * 31 7 0
+ * +-------------------------+-----------+
+ * | process ID | ASID |
+ * +-------------------------+-----------+
+ * | context ID |
+ * +-------------------------------------+
+ *
+ * The ASID is used to tag entries in the CPU caches and TLBs.
+ * The context ID is used by debuggers and trace logic, and
+ * should be unique within all running processes.
+ *
+ * In big endian operation, the two 32 bit words are swapped if accessed
+ * by non-64-bit operations.
+ */
+#define ASID_FIRST_VERSION (1ULL << ASID_BITS)
+#define NUM_USER_ASIDS ASID_FIRST_VERSION
-static DEFINE_SPINLOCK(cpu_asid_lock);
-unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
+static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
+static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
+
+static DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+static cpumask_t tlb_flush_pending;
+
+#ifdef CONFIG_ARM_ERRATA_798181
+void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+ cpumask_t *mask)
+{
+ int cpu;
+ unsigned long flags;
+ u64 context_id, asid;
+
+ raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+ context_id = mm->context.id.counter;
+ for_each_online_cpu(cpu) {
+ if (cpu == this_cpu)
+ continue;
+ /*
+ * We only need to send an IPI if the other CPUs are
+ * running the same ASID as the one being invalidated.
+ */
+ asid = per_cpu(active_asids, cpu).counter;
+ if (asid == 0)
+ asid = per_cpu(reserved_asids, cpu);
+ if (context_id == asid)
+ cpumask_set_cpu(cpu, mask);
+ }
+ raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+}
+#endif
+#ifdef CONFIG_ARM_LPAE
/*
- * We fork()ed a process, and we need a new context for the child
- * to run in. We reserve version 0 for initial tasks so we will
- * always allocate an ASID. The ASID 0 is reserved for the TTBR
- * register changing sequence.
+ * With LPAE, the ASID and page tables are updated atomicly, so there is
+ * no need for a reserved set of tables (the active ASID tracking prevents
+ * any issues across a rollover).
*/
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+#define cpu_set_reserved_ttbr0()
+#else
+static void cpu_set_reserved_ttbr0(void)
{
- mm->context.id = 0;
+ u32 ttb;
+ /*
+ * Copy TTBR1 into TTBR0.
+ * This points at swapper_pg_dir, which contains only global
+ * entries so any speculative walks are perfectly safe.
+ */
+ asm volatile(
+ " mrc p15, 0, %0, c2, c0, 1 @ read TTBR1\n"
+ " mcr p15, 0, %0, c2, c0, 0 @ set TTBR0\n"
+ : "=r" (ttb));
+ isb();
+}
+#endif
+
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd,
+ void *t)
+{
+ u32 contextidr;
+ pid_t pid;
+ struct thread_info *thread = t;
+
+ if (cmd != THREAD_NOTIFY_SWITCH)
+ return NOTIFY_DONE;
+
+ pid = task_pid_nr(thread->task) << ASID_BITS;
+ asm volatile(
+ " mrc p15, 0, %0, c13, c0, 1\n"
+ " and %0, %0, %2\n"
+ " orr %0, %0, %1\n"
+ " mcr p15, 0, %0, c13, c0, 1\n"
+ : "=r" (contextidr), "+r" (pid)
+ : "I" (~ASID_MASK));
+ isb();
+
+ return NOTIFY_OK;
}
-void __new_context(struct mm_struct *mm)
+static struct notifier_block contextidr_notifier_block = {
+ .notifier_call = contextidr_notifier,
+};
+
+static int __init contextidr_notifier_init(void)
{
- unsigned int asid;
+ return thread_register_notifier(&contextidr_notifier_block);
+}
+arch_initcall(contextidr_notifier_init);
+#endif
- spin_lock(&cpu_asid_lock);
- asid = ++cpu_last_asid;
- if (asid == 0)
- asid = cpu_last_asid = ASID_FIRST_VERSION;
+static void flush_context(unsigned int cpu)
+{
+ int i;
+ u64 asid;
+
+ /* Update the list of reserved ASIDs and the ASID bitmap. */
+ bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+ for_each_possible_cpu(i) {
+ if (i == cpu) {
+ asid = 0;
+ } else {
+ asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
+ /*
+ * If this CPU has already been through a
+ * rollover, but hasn't run another task in
+ * the meantime, we must preserve its reserved
+ * ASID, as this is the only trace we have of
+ * the process it is still running.
+ */
+ if (asid == 0)
+ asid = per_cpu(reserved_asids, i);
+ __set_bit(asid & ~ASID_MASK, asid_map);
+ }
+ per_cpu(reserved_asids, i) = asid;
+ }
+
+ /* Queue a TLB invalidate and flush the I-cache if necessary. */
+ cpumask_setall(&tlb_flush_pending);
+
+ if (icache_is_vivt_asid_tagged())
+ __flush_icache_all();
+}
+
+static int is_reserved_asid(u64 asid)
+{
+ int cpu;
+ for_each_possible_cpu(cpu)
+ if (per_cpu(reserved_asids, cpu) == asid)
+ return 1;
+ return 0;
+}
+
+static u64 new_context(struct mm_struct *mm, unsigned int cpu)
+{
+ static u32 cur_idx = 1;
+ u64 asid = atomic64_read(&mm->context.id);
+ u64 generation = atomic64_read(&asid_generation);
+
+ if (asid != 0 && is_reserved_asid(asid)) {
+ /*
+ * Our current ASID was active during a rollover, we can
+ * continue to use it and this was just a false alarm.
+ */
+ asid = generation | (asid & ~ASID_MASK);
+ } else {
+ /*
+ * Allocate a free ASID. If we can't find one, take a
+ * note of the currently active ASIDs and mark the TLBs
+ * as requiring flushes. We always count from ASID #1,
+ * as we reserve ASID #0 to switch via TTBR0 and to
+ * avoid speculative page table walks from hitting in
+ * any partial walk caches, which could be populated
+ * from overlapping level-1 descriptors used to map both
+ * the module area and the userspace stack.
+ */
+ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+ if (asid == NUM_USER_ASIDS) {
+ generation = atomic64_add_return(ASID_FIRST_VERSION,
+ &asid_generation);
+ flush_context(cpu);
+ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+ }
+ __set_bit(asid, asid_map);
+ cur_idx = asid;
+ asid |= generation;
+ cpumask_clear(mm_cpumask(mm));
+ }
+
+ return asid;
+}
+
+void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
+{
+ unsigned long flags;
+ unsigned int cpu = smp_processor_id();
+ u64 asid;
+
+ if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
+ __check_vmalloc_seq(mm);
/*
- * If we've used up all our ASIDs, we need
- * to start a new version and flush the TLB.
+ * We cannot update the pgd and the ASID atomicly with classic
+ * MMU, so switch exclusively to global mappings to avoid
+ * speculative page table walking with the wrong TTBR.
*/
- if (unlikely((asid & ~ASID_MASK) == 0)) {
- asid = ++cpu_last_asid;
- /* set the reserved ASID before flushing the TLB */
- asm("mcr p15, 0, %0, c13, c0, 1 @ set reserved context ID\n"
- :
- : "r" (0));
- isb();
- flush_tlb_all();
- if (icache_is_vivt_asid_tagged()) {
- asm("mcr p15, 0, %0, c7, c5, 0 @ invalidate I-cache\n"
- "mcr p15, 0, %0, c7, c5, 6 @ flush BTAC/BTB\n"
- :
- : "r" (0));
- dsb();
- }
+ cpu_set_reserved_ttbr0();
+
+ asid = atomic64_read(&mm->context.id);
+ if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS)
+ && atomic64_xchg(&per_cpu(active_asids, cpu), asid))
+ goto switch_mm_fastpath;
+
+ raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+ /* Check that our ASID belongs to the current generation. */
+ asid = atomic64_read(&mm->context.id);
+ if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) {
+ asid = new_context(mm, cpu);
+ atomic64_set(&mm->context.id, asid);
}
- spin_unlock(&cpu_asid_lock);
- mm->cpu_vm_mask = cpumask_of_cpu(smp_processor_id());
- mm->context.id = asid;
+ if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
+ local_flush_bp_all();
+ local_flush_tlb_all();
+ }
+
+ atomic64_set(&per_cpu(active_asids, cpu), asid);
+ cpumask_set_cpu(cpu, mm_cpumask(mm));
+ raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+
+switch_mm_fastpath:
+ cpu_switch_mm(mm->pgd, mm);
}
diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c
new file mode 100644
index 00000000000..d130a5ece5d
--- /dev/null
+++ b/arch/arm/mm/copypage-fa.c
@@ -0,0 +1,86 @@
+/*
+ * linux/arch/arm/lib/copypage-fa.S
+ *
+ * Copyright (C) 2005 Faraday Corp.
+ * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on copypage-v4wb.S:
+ * Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+/*
+ * Faraday optimised copy_user_page
+ */
+static void __naked
+fa_copy_user_page(void *kto, const void *kfrom)
+{
+ asm("\
+ stmfd sp!, {r4, lr} @ 2\n\
+ mov r2, %0 @ 1\n\
+1: ldmia r1!, {r3, r4, ip, lr} @ 4\n\
+ stmia r0, {r3, r4, ip, lr} @ 4\n\
+ mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add r0, r0, #16 @ 1\n\
+ ldmia r1!, {r3, r4, ip, lr} @ 4\n\
+ stmia r0, {r3, r4, ip, lr} @ 4\n\
+ mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add r0, r0, #16 @ 1\n\
+ subs r2, r2, #1 @ 1\n\
+ bne 1b @ 1\n\
+ mcr p15, 0, r2, c7, c10, 4 @ 1 drain WB\n\
+ ldmfd sp!, {r4, pc} @ 3"
+ :
+ : "I" (PAGE_SIZE / 32));
+}
+
+void fa_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma)
+{
+ void *kto, *kfrom;
+
+ kto = kmap_atomic(to);
+ kfrom = kmap_atomic(from);
+ fa_copy_user_page(kto, kfrom);
+ kunmap_atomic(kfrom);
+ kunmap_atomic(kto);
+}
+
+/*
+ * Faraday optimised clear_user_page
+ *
+ * Same story as above.
+ */
+void fa_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+ void *ptr, *kaddr = kmap_atomic(page);
+ asm volatile("\
+ mov r1, %2 @ 1\n\
+ mov r2, #0 @ 1\n\
+ mov r3, #0 @ 1\n\
+ mov ip, #0 @ 1\n\
+ mov lr, #0 @ 1\n\
+1: stmia %0, {r2, r3, ip, lr} @ 4\n\
+ mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add %0, %0, #16 @ 1\n\
+ stmia %0, {r2, r3, ip, lr} @ 4\n\
+ mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\
+ add %0, %0, #16 @ 1\n\
+ subs r1, r1, #1 @ 1\n\
+ bne 1b @ 1\n\
+ mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB"
+ : "=r" (ptr)
+ : "0" (kaddr), "I" (PAGE_SIZE / 32)
+ : "r1", "r2", "r3", "ip", "lr");
+ kunmap_atomic(kaddr);
+}
+
+struct cpu_user_fns fa_user_fns __initdata = {
+ .cpu_clear_user_highpage = fa_clear_user_highpage,
+ .cpu_copy_user_highpage = fa_copy_user_highpage,
+};
diff --git a/arch/arm/mm/copypage-feroceon.c b/arch/arm/mm/copypage-feroceon.c
new file mode 100644
index 00000000000..49ee0c1a720
--- /dev/null
+++ b/arch/arm/mm/copypage-feroceon.c
@@ -0,0 +1,112 @@
+/*
+ * linux/arch/arm/mm/copypage-feroceon.S
+ *
+ * Copyright (C) 2008 Marvell Semiconductors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This handles copy_user_highpage and clear_user_page on Feroceon
+ * more optimally than the generic implementations.
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+static void __naked
+feroceon_copy_user_page(void *kto, const void *kfrom)
+{
+ asm("\
+ stmfd sp!, {r4-r9, lr} \n\
+ mov ip, %2 \n\
+1: mov lr, r1 \n\
+ ldmia r1!, {r2 - r9} \n\
+ pld [lr, #32] \n\
+ pld [lr, #64] \n\
+ pld [lr, #96] \n\
+ pld [lr, #128] \n\
+ pld [lr, #160] \n\
+ pld [lr, #192] \n\
+ pld [lr, #224] \n\
+ stmia r0, {r2 - r9} \n\
+ ldmia r1!, {r2 - r9} \n\
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\
+ add r0, r0, #32 \n\
+ stmia r0, {r2 - r9} \n\
+ ldmia r1!, {r2 - r9} \n\
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\
+ add r0, r0, #32 \n\
+ stmia r0, {r2 - r9} \n\
+ ldmia r1!, {r2 - r9} \n\
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\
+ add r0, r0, #32 \n\
+ stmia r0, {r2 - r9} \n\
+ ldmia r1!, {r2 - r9} \n\
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\
+ add r0, r0, #32 \n\
+ stmia r0, {r2 - r9} \n\
+ ldmia r1!, {r2 - r9} \n\
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\
+ add r0, r0, #32 \n\
+ stmia r0, {r2 - r9} \n\
+ ldmia r1!, {r2 - r9} \n\
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\
+ add r0, r0, #32 \n\
+ stmia r0, {r2 - r9} \n\
+ ldmia r1!, {r2 - r9} \n\
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\
+ add r0, r0, #32 \n\
+ stmia r0, {r2 - r9} \n\
+ subs ip, ip, #(32 * 8) \n\
+ mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\
+ add r0, r0, #32 \n\
+ bne 1b \n\
+ mcr p15, 0, ip, c7, c10, 4 @ drain WB\n\
+ ldmfd sp!, {r4-r9, pc}"
+ :
+ : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE));
+}
+
+void feroceon_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma)
+{
+ void *kto, *kfrom;
+
+ kto = kmap_atomic(to);
+ kfrom = kmap_atomic(from);
+ flush_cache_page(vma, vaddr, page_to_pfn(from));
+ feroceon_copy_user_page(kto, kfrom);
+ kunmap_atomic(kfrom);
+ kunmap_atomic(kto);
+}
+
+void feroceon_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+ void *ptr, *kaddr = kmap_atomic(page);
+ asm volatile ("\
+ mov r1, %2 \n\
+ mov r2, #0 \n\
+ mov r3, #0 \n\
+ mov r4, #0 \n\
+ mov r5, #0 \n\
+ mov r6, #0 \n\
+ mov r7, #0 \n\
+ mov ip, #0 \n\
+ mov lr, #0 \n\
+1: stmia %0, {r2-r7, ip, lr} \n\
+ subs r1, r1, #1 \n\
+ mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\
+ add %0, %0, #32 \n\
+ bne 1b \n\
+ mcr p15, 0, r1, c7, c10, 4 @ drain WB"
+ : "=r" (ptr)
+ : "0" (kaddr), "I" (PAGE_SIZE / 32)
+ : "r1", "r2", "r3", "r4", "r5", "r6", "r7", "ip", "lr");
+ kunmap_atomic(kaddr);
+}
+
+struct cpu_user_fns feroceon_user_fns __initdata = {
+ .cpu_clear_user_highpage = feroceon_clear_user_highpage,
+ .cpu_copy_user_highpage = feroceon_copy_user_highpage,
+};
+
diff --git a/arch/arm/mm/copypage-v3.S b/arch/arm/mm/copypage-v3.S
deleted file mode 100644
index 2ee394b11bc..00000000000
--- a/arch/arm/mm/copypage-v3.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * linux/arch/arm/lib/copypage.S
- *
- * Copyright (C) 1995-1999 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * ASM optimised string functions
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-
- .text
- .align 5
-/*
- * ARMv3 optimised copy_user_page
- *
- * FIXME: do we need to handle cache stuff...
- */
-ENTRY(v3_copy_user_page)
- stmfd sp!, {r4, lr} @ 2
- mov r2, #PAGE_SZ/64 @ 1
- ldmia r1!, {r3, r4, ip, lr} @ 4+1
-1: stmia r0!, {r3, r4, ip, lr} @ 4
- ldmia r1!, {r3, r4, ip, lr} @ 4+1
- stmia r0!, {r3, r4, ip, lr} @ 4
- ldmia r1!, {r3, r4, ip, lr} @ 4+1
- stmia r0!, {r3, r4, ip, lr} @ 4
- ldmia r1!, {r3, r4, ip, lr} @ 4
- subs r2, r2, #1 @ 1
- stmia r0!, {r3, r4, ip, lr} @ 4
- ldmneia r1!, {r3, r4, ip, lr} @ 4
- bne 1b @ 1
- ldmfd sp!, {r4, pc} @ 3
-
- .align 5
-/*
- * ARMv3 optimised clear_user_page
- *
- * FIXME: do we need to handle cache stuff...
- */
-ENTRY(v3_clear_user_page)
- str lr, [sp, #-4]!
- mov r1, #PAGE_SZ/64 @ 1
- mov r2, #0 @ 1
- mov r3, #0 @ 1
- mov ip, #0 @ 1
- mov lr, #0 @ 1
-1: stmia r0!, {r2, r3, ip, lr} @ 4
- stmia r0!, {r2, r3, ip, lr} @ 4
- stmia r0!, {r2, r3, ip, lr} @ 4
- stmia r0!, {r2, r3, ip, lr} @ 4
- subs r1, r1, #1 @ 1
- bne 1b @ 1
- ldr pc, [sp], #4
-
- __INITDATA
-
- .type v3_user_fns, #object
-ENTRY(v3_user_fns)
- .long v3_clear_user_page
- .long v3_copy_user_page
- .size v3_user_fns, . - v3_user_fns
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c
index ded0e96d069..1267e64133b 100644
--- a/arch/arm/mm/copypage-v4mc.c
+++ b/arch/arm/mm/copypage-v4mc.c
@@ -15,25 +15,21 @@
*/
#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/highmem.h>
-#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
#include "mm.h"
-/*
- * 0xffff8000 to 0xffffffff is reserved for any ARM architecture
- * specific hacks for copying pages efficiently.
- */
#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
- L_PTE_CACHEABLE)
+ L_PTE_MT_MINICACHE)
-static DEFINE_SPINLOCK(minicache_lock);
+static DEFINE_RAW_SPINLOCK(minicache_lock);
/*
- * ARMv4 mini-dcache optimised copy_user_page
+ * ARMv4 mini-dcache optimised copy_user_highpage
*
* We flush the destination cache lines just before we write the data into the
* corresponding address. Since the Dcache is read-allocate, this removes the
@@ -42,9 +38,9 @@ static DEFINE_SPINLOCK(minicache_lock);
*
* Note: We rely on all ARMv4 processors implementing the "invalidate D line"
* instruction. If your processor does not supply this, you have to write your
- * own copy_user_page that does the right thing.
+ * own copy_user_highpage that does the right thing.
*/
-static void __attribute__((naked))
+static void __naked
mc_copy_user_page(void *from, void *to)
{
asm volatile(
@@ -68,50 +64,52 @@ mc_copy_user_page(void *from, void *to)
: "r" (from), "r" (to), "I" (PAGE_SIZE / 64));
}
-void v4_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
+void v4_mc_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma)
{
- struct page *page = virt_to_page(kfrom);
+ void *kto = kmap_atomic(to);
+
+ if (!test_and_set_bit(PG_dcache_clean, &from->flags))
+ __flush_dcache_page(page_mapping(from), from);
- if (test_and_clear_bit(PG_dcache_dirty, &page->flags))
- __flush_dcache_page(page_mapping(page), page);
+ raw_spin_lock(&minicache_lock);
- spin_lock(&minicache_lock);
+ set_top_pte(COPYPAGE_MINICACHE, mk_pte(from, minicache_pgprot));
- set_pte_ext(TOP_PTE(0xffff8000), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot), 0);
- flush_tlb_kernel_page(0xffff8000);
+ mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto);
- mc_copy_user_page((void *)0xffff8000, kto);
+ raw_spin_unlock(&minicache_lock);
- spin_unlock(&minicache_lock);
+ kunmap_atomic(kto);
}
/*
* ARMv4 optimised clear_user_page
*/
-void __attribute__((naked))
-v4_mc_clear_user_page(void *kaddr, unsigned long vaddr)
+void v4_mc_clear_user_highpage(struct page *page, unsigned long vaddr)
{
- asm volatile(
- "str lr, [sp, #-4]!\n\
- mov r1, %0 @ 1\n\
+ void *ptr, *kaddr = kmap_atomic(page);
+ asm volatile("\
+ mov r1, %2 @ 1\n\
mov r2, #0 @ 1\n\
mov r3, #0 @ 1\n\
mov ip, #0 @ 1\n\
mov lr, #0 @ 1\n\
-1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\
- stmia r0!, {r2, r3, ip, lr} @ 4\n\
- stmia r0!, {r2, r3, ip, lr} @ 4\n\
- mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\
- stmia r0!, {r2, r3, ip, lr} @ 4\n\
- stmia r0!, {r2, r3, ip, lr} @ 4\n\
+1: mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\
+ stmia %0!, {r2, r3, ip, lr} @ 4\n\
+ stmia %0!, {r2, r3, ip, lr} @ 4\n\
+ mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\
+ stmia %0!, {r2, r3, ip, lr} @ 4\n\
+ stmia %0!, {r2, r3, ip, lr} @ 4\n\
subs r1, r1, #1 @ 1\n\
- bne 1b @ 1\n\
- ldr pc, [sp], #4"
- :
- : "I" (PAGE_SIZE / 64));
+ bne 1b @ 1"
+ : "=r" (ptr)
+ : "0" (kaddr), "I" (PAGE_SIZE / 64)
+ : "r1", "r2", "r3", "ip", "lr");
+ kunmap_atomic(kaddr);
}
struct cpu_user_fns v4_mc_user_fns __initdata = {
- .cpu_clear_user_page = v4_mc_clear_user_page,
- .cpu_copy_user_page = v4_mc_copy_user_page,
+ .cpu_clear_user_highpage = v4_mc_clear_user_highpage,
+ .cpu_copy_user_highpage = v4_mc_copy_user_highpage,
};
diff --git a/arch/arm/mm/copypage-v4wb.S b/arch/arm/mm/copypage-v4wb.S
deleted file mode 100644
index 83117354b1c..00000000000
--- a/arch/arm/mm/copypage-v4wb.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * linux/arch/arm/lib/copypage.S
- *
- * Copyright (C) 1995-1999 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * ASM optimised string functions
- */
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <asm/asm-offsets.h>
-
- .text
- .align 5
-/*
- * ARMv4 optimised copy_user_page
- *
- * We flush the destination cache lines just before we write the data into the
- * corresponding address. Since the Dcache is read-allocate, this removes the
- * Dcache aliasing issue. The writes will be forwarded to the write buffer,
- * and merged as appropriate.
- *
- * Note: We rely on all ARMv4 processors implementing the "invalidate D line"
- * instruction. If your processor does not supply this, you have to write your
- * own copy_user_page that does the right thing.
- */
-ENTRY(v4wb_copy_user_page)
- stmfd sp!, {r4, lr} @ 2
- mov r2, #PAGE_SZ/64 @ 1
- ldmia r1!, {r3, r4, ip, lr} @ 4
-1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line
- stmia r0!, {r3, r4, ip, lr} @ 4
- ldmia r1!, {r3, r4, ip, lr} @ 4+1
- stmia r0!, {r3, r4, ip, lr} @ 4
- ldmia r1!, {r3, r4, ip, lr} @ 4
- mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line
- stmia r0!, {r3, r4, ip, lr} @ 4
- ldmia r1!, {r3, r4, ip, lr} @ 4
- subs r2, r2, #1 @ 1
- stmia r0!, {r3, r4, ip, lr} @ 4
- ldmneia r1!, {r3, r4, ip, lr} @ 4
- bne 1b @ 1
- mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB
- ldmfd sp!, {r4, pc} @ 3
-
- .align 5
-/*
- * ARMv4 optimised clear_user_page
- *
- * Same story as above.
- */
-ENTRY(v4wb_clear_user_page)
- str lr, [sp, #-4]!
- mov r1, #PAGE_SZ/64 @ 1
- mov r2, #0 @ 1
- mov r3, #0 @ 1
- mov ip, #0 @ 1
- mov lr, #0 @ 1
-1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line
- stmia r0!, {r2, r3, ip, lr} @ 4
- stmia r0!, {r2, r3, ip, lr} @ 4
- mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line
- stmia r0!, {r2, r3, ip, lr} @ 4
- stmia r0!, {r2, r3, ip, lr} @ 4
- subs r1, r1, #1 @ 1
- bne 1b @ 1
- mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB
- ldr pc, [sp], #4
-
- __INITDATA
-
- .type v4wb_user_fns, #object
-ENTRY(v4wb_user_fns)
- .long v4wb_clear_user_page
- .long v4wb_copy_user_page
- .size v4wb_user_fns, . - v4wb_user_fns
diff --git a/arch/arm/mm/copypage-v4wb.c b/arch/arm/mm/copypage-v4wb.c
new file mode 100644