diff options
Diffstat (limited to 'arch/arm/mm')
100 files changed, 9044 insertions, 5109 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 4414a01e1e8..c348eaee7ee 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -4,31 +4,14 @@ comment "Processor Type" # which CPUs we support in the kernel image, and the compiler instruction # optimiser behaviour. -# ARM610 -config CPU_ARM610 - bool "Support ARM610 processor" if ARCH_RPC - select CPU_32v3 - select CPU_CACHE_V3 - select CPU_CACHE_VIVT - select CPU_CP15_MMU - select CPU_COPY_V3 if MMU - select CPU_TLB_V3 if MMU - select CPU_PABRT_LEGACY - help - The ARM610 is the successor to the ARM3 processor - and was produced by VLSI Technology Inc. - - Say Y if you want support for the ARM610 processor. - Otherwise, say N. - # ARM7TDMI config CPU_ARM7TDMI bool "Support ARM7TDMI processor" depends on !MMU select CPU_32v4T select CPU_ABRT_LV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4 + select CPU_PABRT_LEGACY help A 32-bit RISC microprocessor based on the ARM7 processor core which has no memory control unit and cache. @@ -36,35 +19,16 @@ config CPU_ARM7TDMI Say Y if you want support for the ARM7TDMI processor. Otherwise, say N. -# ARM710 -config CPU_ARM710 - bool "Support ARM710 processor" if ARCH_RPC - select CPU_32v3 - select CPU_CACHE_V3 - select CPU_CACHE_VIVT - select CPU_CP15_MMU - select CPU_COPY_V3 if MMU - select CPU_TLB_V3 if MMU - select CPU_PABRT_LEGACY - help - A 32-bit RISC microprocessor based on the ARM7 processor core - designed by Advanced RISC Machines Ltd. The ARM710 is the - successor to the ARM610 processor. It was released in - July 1994 by VLSI Technology Inc. - - Say Y if you want support for the ARM710 processor. - Otherwise, say N. - # ARM720T config CPU_ARM720T bool "Support ARM720T processor" if ARCH_INTEGRATOR select CPU_32v4T select CPU_ABRT_LV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4 select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WT if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WT if MMU help A 32-bit RISC processor with 8kByte Cache, Write Buffer and @@ -79,9 +43,9 @@ config CPU_ARM740T depends on !MMU select CPU_32v4T select CPU_ABRT_LV4T - select CPU_PABRT_LEGACY - select CPU_CACHE_V3 # although the core is v4t + select CPU_CACHE_V4 select CPU_CP15_MPU + select CPU_PABRT_LEGACY help A 32-bit RISC processor with 8KB cache or 4KB variants, write buffer and MPU(Protection Unit) built around @@ -96,8 +60,8 @@ config CPU_ARM9TDMI depends on !MMU select CPU_32v4T select CPU_ABRT_NOMMU - select CPU_PABRT_LEGACY select CPU_CACHE_V4 + select CPU_PABRT_LEGACY help A 32-bit RISC microprocessor based on the ARM9 processor core which has no memory control unit and cache. @@ -110,11 +74,11 @@ config CPU_ARM920T bool "Support ARM920T processor" if ARCH_INTEGRATOR select CPU_32v4T select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM920T is licensed to be produced by numerous vendors, @@ -128,11 +92,11 @@ config CPU_ARM922T bool "Support ARM922T processor" if ARCH_INTEGRATOR select CPU_32v4T select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM922T is a version of the ARM920T, but with smaller @@ -147,11 +111,11 @@ config CPU_ARM925T bool "Support ARM925T processor" if ARCH_OMAP1 select CPU_32v4T select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM925T is a mix between the ARM920T and ARM926T, but with @@ -166,10 +130,10 @@ config CPU_ARM926T bool "Support ARM926T processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB select CPU_32v5 select CPU_ABRT_EV5TJ - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help This is a variant of the ARM920. It has slightly different @@ -184,11 +148,11 @@ config CPU_FA526 bool select CPU_32v4 select CPU_ABRT_EV4 - select CPU_PABRT_LEGACY - select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_CACHE_FA + select CPU_CACHE_VIVT select CPU_COPY_FA if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_FA if MMU help The FA526 is a version of the ARMv4 compatible processor with @@ -203,9 +167,9 @@ config CPU_ARM940T depends on !MMU select CPU_32v4T select CPU_ABRT_NOMMU - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT select CPU_CP15_MPU + select CPU_PABRT_LEGACY help ARM940T is a member of the ARM9TDMI family of general- purpose microprocessors with MPU and separate 4KB @@ -221,9 +185,9 @@ config CPU_ARM946E depends on !MMU select CPU_32v5 select CPU_ABRT_NOMMU - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT select CPU_CP15_MPU + select CPU_PABRT_LEGACY help ARM946E-S is a member of the ARM9E-S family of high- performance, 32-bit system-on-chip processor solutions. @@ -237,11 +201,11 @@ config CPU_ARM1020 bool "Support ARM1020T (rev 0) processor" if ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM1020 is the 32K cached version of the ARM10 processor, @@ -253,25 +217,25 @@ config CPU_ARM1020 # ARM1020E - needs validating config CPU_ARM1020E bool "Support ARM1020E processor" if ARCH_INTEGRATOR + depends on n select CPU_32v5 select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU - depends on n # ARM1022E config CPU_ARM1022 bool "Support ARM1022E processor" if ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV4T - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU # can probably do better + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM1022E is an implementation of the ARMv5TE architecture @@ -286,10 +250,10 @@ config CPU_ARM1026 bool "Support ARM1026EJ-S processor" if ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV5T # But need Jazelle, but EV5TJ ignores bit 10 - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU # can probably do better + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU help The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture @@ -300,15 +264,15 @@ config CPU_ARM1026 # SA110 config CPU_SA110 - bool "Support StrongARM(R) SA-110 processor" if ARCH_RPC + bool select CPU_32v3 if ARCH_RPC select CPU_32v4 if !ARCH_RPC select CPU_ABRT_EV4 - select CPU_PABRT_LEGACY select CPU_CACHE_V4WB select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WB if MMU help The Intel StrongARM(R) SA-110 is a 32-bit microprocessor and @@ -324,10 +288,10 @@ config CPU_SA1100 bool select CPU_32v4 select CPU_ABRT_EV4 - select CPU_PABRT_LEGACY select CPU_CACHE_V4WB select CPU_CACHE_VIVT select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WB if MMU # XScale @@ -335,9 +299,9 @@ config CPU_XSCALE bool select CPU_32v5 select CPU_ABRT_EV5T - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU # XScale Core Version 3 @@ -345,9 +309,9 @@ config CPU_XSC3 bool select CPU_32v5 select CPU_ABRT_EV5T - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU select IO_36 @@ -356,21 +320,21 @@ config CPU_MOHAWK bool select CPU_32v5 select CPU_ABRT_EV5T - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_V4WBI if MMU - select CPU_COPY_V4WB if MMU # Feroceon config CPU_FEROCEON bool select CPU_32v5 select CPU_ABRT_EV5T - select CPU_PABRT_LEGACY select CPU_CACHE_VIVT - select CPU_CP15_MMU select CPU_COPY_FEROCEON if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY select CPU_TLB_FEROCEON if MMU config CPU_FEROCEON_OLD_ID @@ -382,74 +346,117 @@ config CPU_FEROCEON_OLD_ID for which the CPU ID is equal to the ARM926 ID. Relevant for Feroceon-1850 and early Feroceon-2850. +# Marvell PJ4 +config CPU_PJ4 + bool + select ARM_THUMBEE + select CPU_V7 + +config CPU_PJ4B + bool + select CPU_V7 + # ARMv6 config CPU_V6 - bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX || ARCH_DOVE + bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX select CPU_32v6 select CPU_ABRT_EV6 - select CPU_PABRT_V6 select CPU_CACHE_V6 select CPU_CACHE_VIPT + select CPU_COPY_V6 if MMU select CPU_CP15_MMU select CPU_HAS_ASID if MMU - select CPU_COPY_V6 if MMU + select CPU_PABRT_V6 select CPU_TLB_V6 if MMU # ARMv6k -config CPU_32v6K - bool "Support ARM V6K processor extensions" if !SMP - depends on CPU_V6 || CPU_V7 - default y if SMP && !(ARCH_MX3 || ARCH_OMAP2) - help - Say Y here if your ARMv6 processor supports the 'K' extension. - This enables the kernel to use some instructions not present - on previous processors, and as such a kernel build with this - enabled will not boot on processors with do not support these - instructions. +config CPU_V6K + bool "Support ARM V6K processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX + select CPU_32v6 + select CPU_32v6K + select CPU_ABRT_EV6 + select CPU_CACHE_V6 + select CPU_CACHE_VIPT + select CPU_COPY_V6 if MMU + select CPU_CP15_MMU + select CPU_HAS_ASID if MMU + select CPU_PABRT_V6 + select CPU_TLB_V6 if MMU # ARMv7 config CPU_V7 bool "Support ARM V7 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX - select CPU_32v6K if !ARCH_OMAP2 + select CPU_32v6K select CPU_32v7 select CPU_ABRT_EV7 - select CPU_PABRT_V7 select CPU_CACHE_V7 select CPU_CACHE_VIPT - select CPU_CP15_MMU - select CPU_HAS_ASID if MMU select CPU_COPY_V6 if MMU + select CPU_CP15_MMU if MMU + select CPU_CP15_MPU if !MMU + select CPU_HAS_ASID if MMU + select CPU_PABRT_V7 select CPU_TLB_V7 if MMU +# ARMv7M +config CPU_V7M + bool + select CPU_32v7M + select CPU_ABRT_NOMMU + select CPU_CACHE_NOP + select CPU_PABRT_LEGACY + select CPU_THUMBONLY + +config CPU_THUMBONLY + bool + # There are no CPUs available with MMU that don't implement an ARM ISA: + depends on !MMU + help + Select this if your CPU doesn't support the 32 bit ARM instructions. + # Figure out what processor architecture version we should be using. # This defines the compiler instruction set which depends on the machine type. config CPU_32v3 bool - select TLS_REG_EMUL if SMP || !MMU + select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU config CPU_32v4 bool - select TLS_REG_EMUL if SMP || !MMU + select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU config CPU_32v4T bool - select TLS_REG_EMUL if SMP || !MMU + select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU config CPU_32v5 bool - select TLS_REG_EMUL if SMP || !MMU + select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU config CPU_32v6 bool select TLS_REG_EMUL if !CPU_32v6K && !MMU +config CPU_32v6K + bool + config CPU_32v7 bool +config CPU_32v7M + bool + # The abort model config CPU_ABRT_NOMMU bool @@ -485,9 +492,6 @@ config CPU_PABRT_V7 bool # The cache model -config CPU_CACHE_V3 - bool - config CPU_CACHE_V4 bool @@ -503,6 +507,9 @@ config CPU_CACHE_V6 config CPU_CACHE_V7 bool +config CPU_CACHE_NOP + bool + config CPU_CACHE_VIVT bool @@ -514,9 +521,6 @@ config CPU_CACHE_FA if MMU # The copy-page model -config CPU_COPY_V3 - bool - config CPU_COPY_V4WT bool @@ -533,11 +537,6 @@ config CPU_COPY_V6 bool # This selects the TLB model -config CPU_TLB_V3 - bool - help - ARM Architecture Version 3 TLB. - config CPU_TLB_V4WT bool help @@ -599,6 +598,12 @@ config CPU_CP15_MPU help Processor has the CP15 register, which has MPU related registers. +config CPU_USE_DOMAINS + bool + help + This option enables or disables the use of domain switching + via the set_fs() function. + # # CPU supports 36-bit I/O # @@ -607,9 +612,31 @@ config IO_36 comment "Processor Features" +config ARM_LPAE + bool "Support for the Large Physical Address Extension" + depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \ + !CPU_32v4 && !CPU_32v3 + help + Say Y if you have an ARMv7 processor supporting the LPAE page + table format and you would like to access memory beyond the + 4GB limit. The resulting kernel image will not run on + processors without the LPA extension. + + If unsure, say N. + +config ARCH_PHYS_ADDR_T_64BIT + def_bool ARM_LPAE + +config ARCH_DMA_ADDR_T_64BIT + bool + config ARM_THUMB - bool "Support Thumb user binaries" - depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V7 || CPU_FEROCEON + bool "Support Thumb user binaries" if !CPU_THUMBONLY + depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || \ + CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || \ + CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \ + CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || \ + CPU_V7 || CPU_FEROCEON || CPU_V7M default y help Say Y if you want to include kernel support for running user space @@ -628,6 +655,46 @@ config ARM_THUMBEE Say Y here if you have a CPU with the ThumbEE extension and code to make use of it. Say N for code that can run on CPUs without ThumbEE. +config ARM_VIRT_EXT + bool + depends on MMU + default y if CPU_V7 + help + Enable the kernel to make use of the ARM Virtualization + Extensions to install hypervisors without run-time firmware + assistance. + + A compliant bootloader is required in order to make maximum + use of this feature. Refer to Documentation/arm/Booting for + details. + +config SWP_EMULATE + bool "Emulate SWP/SWPB instructions" + depends on CPU_V7 + default y if SMP + select HAVE_PROC_CPU if PROC_FS + help + ARMv6 architecture deprecates use of the SWP/SWPB instructions. + ARMv7 multiprocessing extensions introduce the ability to disable + these instructions, triggering an undefined instruction exception + when executed. Say Y here to enable software emulation of these + instructions for userspace (not kernel) using LDREX/STREX. + Also creates /proc/cpu/swp_emulation for statistics. + + In some older versions of glibc [<=2.8] SWP is used during futex + trylock() operations with the assumption that the code will not + be preempted. This invalid assumption may be more likely to fail + with SWP emulation enabled, leading to deadlock of the user + application. + + NOTE: when accessing uncached shared regions, LDREX/STREX rely + on an external transaction monitoring block called a global + monitor to maintain update atomicity. If your system does not + implement a global monitor, this option can cause programs that + perform SWP operations to uncached memory to deadlock. + + If unsure, say Y. + config CPU_BIG_ENDIAN bool "Build big-endian kernel" depends on ARCH_SUPPORTS_BIG_ENDIAN @@ -640,7 +707,7 @@ config CPU_BIG_ENDIAN config CPU_ENDIAN_BE8 bool depends on CPU_BIG_ENDIAN - default CPU_V6 || CPU_V7 + default CPU_V6 || CPU_V6K || CPU_V7 help Support for the BE-8 (big-endian) mode on ARMv6 and ARMv7 processors. @@ -656,7 +723,7 @@ config CPU_HIGH_VECTOR bool "Select the High exception vector" help Say Y here to select high exception vector(0xFFFF0000~). - The exception vector can be vary depending on the platform + The exception vector can vary depending on the platform design in nommu mode. If your platform needs to select high exception vector, say Y. Otherwise or if you are unsure, say N, and the low exception @@ -664,7 +731,7 @@ config CPU_HIGH_VECTOR config CPU_ICACHE_DISABLE bool "Disable I-Cache (I-bit)" - depends on CPU_CP15 && !(CPU_ARM610 || CPU_ARM710 || CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3) + depends on CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3) help Say Y here to disable the processor instruction cache. Unless you have a reason not to or are unsure, say N. @@ -706,12 +773,13 @@ config CPU_CACHE_ROUND_ROBIN config CPU_BPREDICT_DISABLE bool "Disable branch prediction" - depends on CPU_ARM1020 || CPU_V6 || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 + depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 help Say Y here to disable branch prediction. If unsure, say N. config TLS_REG_EMUL bool + select NEED_KUSER_HELPERS help An SMP system using a pre-ARMv6 processor (there are apparently a few prototypes like that in existence) and therefore access to @@ -719,14 +787,46 @@ config TLS_REG_EMUL config NEEDS_SYSCALL_FOR_CMPXCHG bool + select NEED_KUSER_HELPERS help SMP on a pre-ARMv6 processor? Well OK then. Forget about fast user space cmpxchg support. It is just not possible. +config NEED_KUSER_HELPERS + bool + +config KUSER_HELPERS + bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS + default y + help + Warning: disabling this option may break user programs. + + Provide kuser helpers in the vector page. The kernel provides + helper code to userspace in read only form at a fixed location + in the high vector page to allow userspace to be independent of + the CPU type fitted to the system. This permits binaries to be + run on ARMv4 through to ARMv7 without modification. + + See Documentation/arm/kernel_user_helpers.txt for details. + + However, the fixed address nature of these helpers can be used + by ROP (return orientated programming) authors when creating + exploits. + + If all of the binaries and libraries which run on your platform + are built specifically for your platform, and make no use of + these helpers, then you can turn this option off to hinder + such exploits. However, in that case, if a binary or library + relying on those helpers is run, it will receive a SIGILL signal, + which will terminate the program. + + Say N here only if you are absolutely certain that you do not + need these helpers; otherwise, the safe option is to say Y. + config DMA_CACHE_RWFO bool "Enable read/write for ownership DMA cache maintenance" - depends on CPU_V6 && SMP + depends on CPU_V6K && SMP default y help The Snoop Control Unit on ARM11MPCore does not detect the @@ -754,7 +854,7 @@ config OUTER_CACHE_SYNC config CACHE_FEROCEON_L2 bool "Enable the Feroceon L2 cache controller" - depends on ARCH_KIRKWOOD || ARCH_MV78XX0 + depends on ARCH_KIRKWOOD || ARCH_MV78XX0 || ARCH_MVEBU default y select OUTER_CACHE help @@ -767,29 +867,89 @@ config CACHE_FEROCEON_L2_WRITETHROUGH Say Y here to use the Feroceon L2 cache in writethrough mode. Unless you specifically require this, say N for writeback mode. +config MIGHT_HAVE_CACHE_L2X0 + bool + help + This option should be selected by machines which have a L2x0 + or PL310 cache controller, but where its use is optional. + + The only effect of this option is to make CACHE_L2X0 and + related options available to the user for configuration. + + Boards or SoCs which always require the cache controller + support to be present should select CACHE_L2X0 directly + instead of this option, thus preventing the user from + inadvertently configuring a broken kernel. + config CACHE_L2X0 - bool "Enable the L2x0 outer cache controller" - depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || \ - REALVIEW_EB_A9MP || ARCH_MX35 || ARCH_MX31 || MACH_REALVIEW_PBX || \ - ARCH_NOMADIK || ARCH_OMAP4 || ARCH_S5PV310 || ARCH_TEGRA || \ - ARCH_U8500 || ARCH_VEXPRESS_CA9X4 - default y + bool "Enable the L2x0 outer cache controller" if MIGHT_HAVE_CACHE_L2X0 + default MIGHT_HAVE_CACHE_L2X0 select OUTER_CACHE select OUTER_CACHE_SYNC help This option enables the L2x0 PrimeCell. +if CACHE_L2X0 + config CACHE_PL310 bool - depends on CACHE_L2X0 - default y if CPU_V7 && !CPU_V6 + default y if CPU_V7 && !(CPU_V6 || CPU_V6K) help This option enables optimisations for the PL310 cache controller. +config PL310_ERRATA_588369 + bool "PL310 errata: Clean & Invalidate maintenance operations do not invalidate clean lines" + help + The PL310 L2 cache controller implements three types of Clean & + Invalidate maintenance operations: by Physical Address + (offset 0x7F0), by Index/Way (0x7F8) and by Way (0x7FC). + They are architecturally defined to behave as the execution of a + clean operation followed immediately by an invalidate operation, + both performing to the same memory location. This functionality + is not correctly implemented in PL310 as clean lines are not + invalidated as a result of these operations. + +config PL310_ERRATA_727915 + bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption" + help + PL310 implements the Clean & Invalidate by Way L2 cache maintenance + operation (offset 0x7FC). This operation runs in background so that + PL310 can handle normal accesses while it is in progress. Under very + rare circumstances, due to this erratum, write data can be lost when + PL310 treats a cacheable write transaction during a Clean & + Invalidate by Way operation. + +config PL310_ERRATA_753970 + bool "PL310 errata: cache sync operation may be faulty" + help + This option enables the workaround for the 753970 PL310 (r3p0) erratum. + + Under some condition the effect of cache sync operation on + the store buffer still remains when the operation completes. + This means that the store buffer is always asked to drain and + this prevents it from merging any further writes. The workaround + is to replace the normal offset of cache sync operation (0x730) + by another offset targeting an unmapped PL310 register 0x740. + This has the same effect as the cache sync operation: store buffer + drain and waiting for all buffers empty. + +config PL310_ERRATA_769419 + bool "PL310 errata: no automatic Store Buffer drain" + help + On revisions of the PL310 prior to r3p2, the Store Buffer does + not automatically drain. This can cause normal, non-cacheable + writes to be retained when the memory system is idle, leading + to suboptimal I/O performance for drivers using coherent DMA. + This option adds a write barrier to the cpu_idle loop so that, + on systems with an outer cache, the store buffer is drained + explicitly. + +endif + config CACHE_TAUROS2 bool "Enable the Tauros2 L2 cache controller" - depends on (ARCH_DOVE || ARCH_MMP) + depends on (ARCH_DOVE || ARCH_MMP || CPU_PJ4) default y select OUTER_CACHE help @@ -804,16 +964,22 @@ config CACHE_XSC3L2 help This option enables the L2 cache on XScale3. +config ARM_L1_CACHE_SHIFT_6 + bool + default y if CPU_V7 + help + Setting ARM L1 cache line size to 64 Bytes. + config ARM_L1_CACHE_SHIFT int default 6 if ARM_L1_CACHE_SHIFT_6 default 5 config ARM_DMA_MEM_BUFFERABLE - bool "Use non-cacheable memory for DMA" if CPU_V6 && !CPU_V7 + bool "Use non-cacheable memory for DMA" if (CPU_V6 || CPU_V6K) && !CPU_V7 depends on !(MACH_REALVIEW_PB1176 || REALVIEW_EB_ARM11MP || \ MACH_REALVIEW_PB11MP) - default y if CPU_V6 || CPU_V7 + default y if CPU_V6 || CPU_V6K || CPU_V7 help Historically, the kernel has used strongly ordered mappings to provide DMA coherent memory. With the advent of ARMv7, mapping @@ -835,3 +1001,9 @@ config ARCH_HAS_BARRIERS help This option allows the use of custom mandatory barriers included via the mach/barriers.h file. + +config ARCH_SUPPORTS_BIG_ENDIAN + bool + help + This option specifies the architecture can support big endian + operation. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index d63b6c41375..91da64de440 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -5,17 +5,19 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ iomap.o -obj-$(CONFIG_MMU) += fault-armv.o flush.o ioremap.o mmap.o \ - pgd.o mmu.o vmregion.o +obj-$(CONFIG_MMU) += fault-armv.o flush.o idmap.o ioremap.o \ + mmap.o pgd.o mmu.o ifneq ($(CONFIG_MMU),y) obj-y += nommu.o endif +obj-$(CONFIG_ARM_PTDUMP) += dump.o obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o @@ -33,18 +35,17 @@ obj-$(CONFIG_CPU_PABRT_LEGACY) += pabort-legacy.o obj-$(CONFIG_CPU_PABRT_V6) += pabort-v6.o obj-$(CONFIG_CPU_PABRT_V7) += pabort-v7.o -obj-$(CONFIG_CPU_CACHE_V3) += cache-v3.o obj-$(CONFIG_CPU_CACHE_V4) += cache-v4.o obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o +obj-$(CONFIG_CPU_CACHE_NOP) += cache-nop.o AFLAGS_cache-v6.o :=-Wa,-march=armv6 AFLAGS_cache-v7.o :=-Wa,-march=armv7-a -obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o obj-$(CONFIG_CPU_COPY_FEROCEON) += copypage-feroceon.o @@ -54,7 +55,6 @@ obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o -obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o @@ -66,8 +66,6 @@ obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o AFLAGS_tlb-v6.o :=-Wa,-march=armv6 AFLAGS_tlb-v7.o :=-Wa,-march=armv7-a -obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o -obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o obj-$(CONFIG_CPU_ARM7TDMI) += proc-arm7tdmi.o obj-$(CONFIG_CPU_ARM720T) += proc-arm720.o obj-$(CONFIG_CPU_ARM740T) += proc-arm740.o @@ -90,12 +88,15 @@ obj-$(CONFIG_CPU_XSC3) += proc-xsc3.o obj-$(CONFIG_CPU_MOHAWK) += proc-mohawk.o obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o obj-$(CONFIG_CPU_V6) += proc-v6.o +obj-$(CONFIG_CPU_V6K) += proc-v6.o obj-$(CONFIG_CPU_V7) += proc-v7.o +obj-$(CONFIG_CPU_V7M) += proc-v7m.o AFLAGS_proc-v6.o :=-Wa,-march=armv6 AFLAGS_proc-v7.o :=-Wa,-march=armv7-a +obj-$(CONFIG_OUTER_CACHE) += l2c-common.o obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o -obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o +obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S index 4f18f9e87ba..54473cd4aba 100644 --- a/arch/arm/mm/abort-ev4.S +++ b/arch/arm/mm/abort-ev4.S @@ -3,14 +3,11 @@ /* * Function: v4_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -21,10 +18,8 @@ ENTRY(v4_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR - ldr r3, [r2] @ read aborted ARM instruction + ldr r3, [r4] @ read aborted ARM instruction bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR tst r3, #1 << 20 @ L = 1 -> write? orreq r1, r1, #1 << 11 @ yes. - mov pc, lr - - + b do_DataAbort diff --git a/arch/arm/mm/abort-ev4t.S b/arch/arm/mm/abort-ev4t.S index b6282548f92..9da704e7b86 100644 --- a/arch/arm/mm/abort-ev4t.S +++ b/arch/arm/mm/abort-ev4t.S @@ -4,14 +4,11 @@ /* * Function: v4t_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -22,9 +19,9 @@ ENTRY(v4t_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR - do_thumb_abort - ldreq r3, [r2] @ read aborted ARM instruction + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR tst r3, #1 << 20 @ check write orreq r1, r1, #1 << 11 - mov pc, lr + b do_DataAbort diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S index 02251b526c0..a0908d4653a 100644 --- a/arch/arm/mm/abort-ev5t.S +++ b/arch/arm/mm/abort-ev5t.S @@ -4,14 +4,11 @@ /* * Function: v5t_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -22,10 +19,10 @@ ENTRY(v5t_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR - do_thumb_abort - ldreq r3, [r2] @ read aborted ARM instruction + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction bic r1, r1, #1 << 11 @ clear bits 11 of FSR - do_ldrd_abort + do_ldrd_abort tmp=ip, insn=r3 tst r3, #1 << 20 @ check write orreq r1, r1, #1 << 11 - mov pc, lr + b do_DataAbort diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S index bce68d601c8..4006b7a6126 100644 --- a/arch/arm/mm/abort-ev5tj.S +++ b/arch/arm/mm/abort-ev5tj.S @@ -4,14 +4,11 @@ /* * Function: v5tj_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -23,13 +20,11 @@ ENTRY(v5tj_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR - tst r3, #PSR_J_BIT @ Java? - movne pc, lr - do_thumb_abort - ldreq r3, [r2] @ read aborted ARM instruction - do_ldrd_abort + tst r5, #PSR_J_BIT @ Java? + bne do_DataAbort + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction + do_ldrd_abort tmp=ip, insn=r3 tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. - mov pc, lr - - + b do_DataAbort diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index f332df7f0d3..3815a8262af 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -4,14 +4,11 @@ /* * Function: v6_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -20,29 +17,31 @@ */ .align 5 ENTRY(v6_early_abort) -#ifdef CONFIG_CPU_32v6K - clrex -#else +#ifdef CONFIG_CPU_V6 sub r1, sp, #4 @ Get unused stack location strex r0, r1, [r1] @ Clear the exclusive monitor +#elif defined(CONFIG_CPU_32v6K) + clrex #endif mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR /* - * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR (erratum 326103). - * The test below covers all the write situations, including Java bytecodes + * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR. */ +#ifdef CONFIG_ARM_ERRATA_326103 + ldr ip, =0x4107b36 + mrc p15, 0, r3, c0, c0, 0 @ get processor id + teq ip, r3, lsr #4 @ r0 ARM1136? + bne do_DataAbort + tst r5, #PSR_J_BIT @ Java? + tsteq r5, #PSR_T_BIT @ Thumb? + bne do_DataAbort bic r1, r1, #1 << 11 @ clear bit 11 of FSR - tst r3, #PSR_J_BIT @ Java? - movne pc, lr - do_thumb_abort - ldreq r3, [r2] @ read aborted ARM instruction -#ifdef CONFIG_CPU_ENDIAN_BE8 - reveq r3, r3 -#endif - do_ldrd_abort + ldr r3, [r4] @ read aborted ARM instruction + ARM_BE8(rev r3, r3) + + do_ldrd_abort tmp=ip, insn=r3 tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. - mov pc, lr - - +#endif + b do_DataAbort diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S index ec88b157d3b..703375277ba 100644 --- a/arch/arm/mm/abort-ev7.S +++ b/arch/arm/mm/abort-ev7.S @@ -3,14 +3,11 @@ /* * Function: v7_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current aborted instruction. */ @@ -37,18 +34,18 @@ ENTRY(v7_early_abort) ldr r3, =0x40d @ On permission fault and r3, r1, r3 cmp r3, #0x0d - movne pc, lr + bne do_DataAbort mcr p15, 0, r0, c7, c8, 0 @ Retranslate FAR isb - mrc p15, 0, r2, c7, c4, 0 @ Read the PAR - and r3, r2, #0x7b @ On translation fault + mrc p15, 0, ip, c7, c4, 0 @ Read the PAR + and r3, ip, #0x7b @ On translation fault cmp r3, #0x0b - movne pc, lr + bne do_DataAbort bic r1, r1, #0xf @ Fix up FSR FS[5:0] - and r2, r2, #0x7e - orr r1, r1, r2, LSR #1 + and ip, ip, #0x7e + orr r1, r1, ip, LSR #1 #endif - mov pc, lr + b do_DataAbort ENDPROC(v7_early_abort) diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S index 9fb7b0e25ea..f3982580c27 100644 --- a/arch/arm/mm/abort-lv4t.S +++ b/arch/arm/mm/abort-lv4t.S @@ -3,14 +3,11 @@ /* * Function: v4t_late_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4-r5, r10-r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -18,7 +15,7 @@ * picture. Unfortunately, this does happen. We live with it. */ ENTRY(v4t_late_abort) - tst r3, #PSR_T_BIT @ check for thumb mode + tst r5, #PSR_T_BIT @ check for thumb mode #ifdef CONFIG_CPU_CP15_MMU mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR @@ -28,7 +25,7 @@ ENTRY(v4t_late_abort) mov r1, #0 #endif bne .data_thumb_abort - ldr r8, [r2] @ read arm instruction + ldr r8, [r4] @ read arm instruction tst r8, #1 << 20 @ L = 1 -> write? orreq r1, r1, #1 << 11 @ yes. and r7, r8, #15 << 24 @@ -47,86 +44,84 @@ ENTRY(v4t_late_abort) /* 9 */ b .data_arm_ldmstm @ ldm*b rn, <rlist> /* a */ b .data_unknown /* b */ b .data_unknown -/* c */ mov pc, lr @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m -/* d */ mov pc, lr @ ldc rd, [rn, #m] +/* c */ b do_DataAbort @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m +/* d */ b do_DataAbort @ ldc rd, [rn, #m] /* e */ b .data_unknown /* f */ .data_unknown: @ Part of jumptable - mov r0, r2 + mov r0, r4 mov r1, r8 - mov r2, sp - bl baddataabort - b ret_from_exception + b baddataabort .data_arm_ldmstm: tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup + beq do_DataAbort @ no writeback -> no fixup mov r7, #0x11 orr r7, r7, #0x1100 and r6, r8, r7 - and r2, r8, r7, lsl #1 - add r6, r6, r2, lsr #1 - and r2, r8, r7, lsl #2 - add r6, r6, r2, lsr #2 - and r2, r8, r7, lsl #3 - add r6, r6, r2, lsr #3 + and r9, r8, r7, lsl #1 + add r6, r6, r9, lsr #1 + and r9, r8, r7, lsl #2 + add r6, r6, r9, lsr #2 + and r9, r8, r7, lsl #3 + add r6, r6, r9, lsr #3 add r6, r6, r6, lsr #8 add r6, r6, r6, lsr #4 and r6, r6, #15 @ r6 = no. of registers to transfer. - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' tst r8, #1 << 23 @ Check U bit subne r7, r7, r6, lsl #2 @ Undo increment addeq r7, r7, r6, lsl #2 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort .data_arm_lateldrhpre: tst r8, #1 << 21 @ Check writeback bit - moveq pc, lr @ No writeback -> no fixup + beq do_DataAbort @ No writeback -> no fixup .data_arm_lateldrhpost: - and r5, r8, #0x00f @ get Rm / low nibble of immediate value + and r9, r8, #0x00f @ get Rm / low nibble of immediate value tst r8, #1 << 22 @ if (immediate offset) andne r6, r8, #0xf00 @ { immediate high nibble - orrne r6, r5, r6, lsr #4 @ combine nibbles } else - ldreq r6, [sp, r5, lsl #2] @ { load Rm value } + orrne r6, r9, r6, lsr #4 @ combine nibbles } else + ldreq r6, [r2, r9, lsl #2] @ { load Rm value } .data_arm_apply_r6_and_rn: - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' tst r8, #1 << 23 @ Check U bit subne r7, r7, r6 @ Undo incrmenet addeq r7, r7, r6 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort .data_arm_lateldrpreconst: tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup + beq do_DataAbort @ no writeback -> no fixup .data_arm_lateldrpostconst: - movs r2, r8, lsl #20 @ Get offset - moveq pc, lr @ zero -> no fixup - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + movs r6, r8, lsl #20 @ Get offset + beq do_DataAbort @ zero -> no fixup + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' tst r8, #1 << 23 @ Check U bit - subne r7, r7, r2, lsr #20 @ Undo increment - addeq r7, r7, r2, lsr #20 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr + subne r7, r7, r6, lsr #20 @ Undo increment + addeq r7, r7, r6, lsr #20 @ Undo decrement + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort .data_arm_lateldrprereg: tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup + beq do_DataAbort @ no writeback -> no fixup .data_arm_lateldrpostreg: and r7, r8, #15 @ Extract 'm' from instruction - ldr r6, [sp, r7, lsl #2] @ Get register 'Rm' - mov r5, r8, lsr #7 @ get shift count - ands r5, r5, #31 + ldr r6, [r2, r7, lsl #2] @ Get register 'Rm' + mov r9, r8, lsr #7 @ get shift count + ands r9, r9, #31 and r7, r8, #0x70 @ get shift type orreq r7, r7, #8 @ shift count = 0 add pc, pc, r7 nop - mov r6, r6, lsl r5 @ 0: LSL #!0 + mov r6, r6, lsl r9 @ 0: LSL #!0 b .data_arm_apply_r6_and_rn b .data_arm_apply_r6_and_rn @ 1: LSL #0 nop @@ -134,7 +129,7 @@ ENTRY(v4t_late_abort) nop b .data_unknown @ 3: MUL? nop - mov r6, r6, lsr r5 @ 4: LSR #!0 + mov r6, r6, lsr r9 @ 4: LSR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, lsr #32 @ 5: LSR #32 b .data_arm_apply_r6_and_rn @@ -142,7 +137,7 @@ ENTRY(v4t_late_abort) nop b .data_unknown @ 7: MUL? nop - mov r6, r6, asr r5 @ 8: ASR #!0 + mov r6, r6, asr r9 @ 8: ASR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, asr #32 @ 9: ASR #32 b .data_arm_apply_r6_and_rn @@ -150,7 +145,7 @@ ENTRY(v4t_late_abort) nop b .data_unknown @ B: MUL? nop - mov r6, r6, ror r5 @ C: ROR #!0 + mov r6, r6, ror r9 @ C: ROR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, rrx @ D: RRX b .data_arm_apply_r6_and_rn @@ -159,7 +154,7 @@ ENTRY(v4t_late_abort) b .data_unknown @ F: MUL? .data_thumb_abort: - ldrh r8, [r2] @ read instruction + ldrh r8, [r4] @ read instruction tst r8, #1 << 11 @ L = 1 -> write? orreq r1, r1, #1 << 8 @ yes and r7, r8, #15 << 12 @@ -172,10 +167,10 @@ ENTRY(v4t_late_abort) /* 3 */ b .data_unknown /* 4 */ b .data_unknown /* 5 */ b .data_thumb_reg -/* 6 */ mov pc, lr -/* 7 */ mov pc, lr -/* 8 */ mov pc, lr -/* 9 */ mov pc, lr +/* 6 */ b do_DataAbort +/* 7 */ b do_DataAbort +/* 8 */ b do_DataAbort +/* 9 */ b do_DataAbort /* A */ b .data_unknown /* B */ b .data_thumb_pushpop /* C */ b .data_thumb_ldmstm @@ -185,41 +180,41 @@ ENTRY(v4t_late_abort) .data_thumb_reg: tst r8, #1 << 9 - moveq pc, lr + beq do_DataAbort tst r8, #1 << 10 @ If 'S' (signed) bit is set movne r1, #0 @ it must be a load instr - mov pc, lr + b do_DataAbort .data_thumb_pushpop: tst r8, #1 << 10 beq .data_unknown and r6, r8, #0x55 @ hweight8(r8) + R bit - and r2, r8, #0xaa - add r6, r6, r2, lsr #1 - and r2, r6, #0xcc + and r9, r8, #0xaa + add r6, r6, r9, lsr #1 + and r9, r6, #0xcc and r6, r6, #0x33 - add r6, r6, r2, lsr #2 + add r6, r6, r9, lsr #2 movs r7, r8, lsr #9 @ C = r8 bit 8 (R bit) adc r6, r6, r6, lsr #4 @ high + low nibble + R bit and r6, r6, #15 @ number of regs to transfer - ldr r7, [sp, #13 << 2] + ldr r7, [r2, #13 << 2] tst r8, #1 << 11 addeq r7, r7, r6, lsl #2 @ increment SP if PUSH subne r7, r7, r6, lsl #2 @ decrement SP if POP - str r7, [sp, #13 << 2] - mov pc, lr + str r7, [r2, #13 << 2] + b do_DataAbort .data_thumb_ldmstm: and r6, r8, #0x55 @ hweight8(r8) - and r2, r8, #0xaa - add r6, r6, r2, lsr #1 - and r2, r6, #0xcc + and r9, r8, #0xaa + add r6, r6, r9, lsr #1 + and r9, r6, #0xcc and r6, r6, #0x33 - add r6, r6, r2, lsr #2 + add r6, r6, r9, lsr #2 add r6, r6, r6, lsr #4 - and r5, r8, #7 << 8 - ldr r7, [sp, r5, lsr #6] + and r9, r8, #7 << 8 + ldr r7, [r2, r9, lsr #6] and r6, r6, #15 @ number of regs to transfer sub r7, r7, r6, lsl #2 @ always decrement - str r7, [sp, r5, lsr #6] - mov pc, lr + str r7, [r2, r9, lsr #6] + b do_DataAbort diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S index d7cb1bfa51a..2cbf68ef0e8 100644 --- a/arch/arm/mm/abort-macro.S +++ b/arch/arm/mm/abort-macro.S @@ -9,34 +9,32 @@ * */ - .macro do_thumb_abort - tst r3, #PSR_T_BIT + .macro do_thumb_abort, fsr, pc, psr, tmp + tst \psr, #PSR_T_BIT beq not_thumb - ldrh r3, [r2] @ Read aborted Thumb instruction - and r3, r3, # 0xfe00 @ Mask opcode field - cmp r3, # 0x5600 @ Is it ldrsb? - orreq r3, r3, #1 << 11 @ Set L-bit if yes - tst r3, #1 << 11 @ L = 0 -> write - orreq r1, r1, #1 << 11 @ yes. - mov pc, lr + ldrh \tmp, [\pc] @ Read aborted Thumb instruction + and \tmp, \tmp, # 0xfe00 @ Mask opcode field + cmp \tmp, # 0x5600 @ Is it ldrsb? + orreq \tmp, \tmp, #1 << 11 @ Set L-bit if yes + tst \tmp, #1 << 11 @ L = 0 -> write + orreq \fsr, \fsr, #1 << 11 @ yes. + b do_DataAbort not_thumb: .endm /* - * We check for the following insturction encoding for LDRD. + * We check for the following instruction encoding for LDRD. * - * [27:25] == 0 + * [27:25] == 000 * [7:4] == 1101 * [20] == 0 */ - .macro do_ldrd_abort - tst r3, #0x0e000000 @ [27:25] == 0 + .macro do_ldrd_abort, tmp, insn + tst \insn, #0x0e100000 @ [27:25,20] == 0 bne not_ldrd - and r2, r3, #0x000000f0 @ [7:4] == 1101 - cmp r2, #0x000000d0 - bne not_ldrd - tst r3, #1 << 20 @ [20] == 0 - moveq pc, lr + and \tmp, \insn, #0x000000f0 @ [7:4] == 1101 + cmp \tmp, #0x000000d0 + beq do_DataAbort not_ldrd: .endm diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S index 625e580945b..119cb479c2a 100644 --- a/arch/arm/mm/abort-nommu.S +++ b/arch/arm/mm/abort-nommu.S @@ -3,11 +3,11 @@ /* * Function: nommu_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = 0 (abort address) - * : r1 = 0 (FSR) + * Returns : r4 - r11, r13 preserved * * Note: There is no FSR/FAR on !CPU_CP15_MMU cores. * Just fill zero into the registers. @@ -16,5 +16,5 @@ ENTRY(nommu_early_abort) mov r0, #0 @ clear r0, r1 (no FSR/FAR) mov r1, #0 - mov pc, lr + b do_DataAbort ENDPROC(nommu_early_abort) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 724ba3bce72..b8cb1a2688a 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -22,9 +22,13 @@ #include <linux/sched.h> #include <linux/uaccess.h> +#include <asm/cp15.h> +#include <asm/system_info.h> #include <asm/unaligned.h> +#include <asm/opcodes.h> #include "fault.h" +#include "mm.h" /* * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998 @@ -78,6 +82,7 @@ static unsigned long ai_word; static unsigned long ai_dword; static unsigned long ai_multi; static int ai_usermode; +static unsigned long cr_no_alignment; core_param(alignment, ai_usermode, int, 0600); @@ -85,6 +90,33 @@ core_param(alignment, ai_usermode, int, 0600); #define UM_FIXUP (1 << 1) #define UM_SIGNAL (1 << 2) +/* Return true if and only if the ARMv6 unaligned access model is in use. */ +static bool cpu_is_v6_unaligned(void) +{ + return cpu_architecture() >= CPU_ARCH_ARMv6 && get_cr() & CR_U; +} + +static int safe_usermode(int new_usermode, bool warn) +{ + /* + * ARMv6 and later CPUs can perform unaligned accesses for + * most single load and store instructions up to word size. + * LDM, STM, LDRD and STRD still need to be handled. + * + * Ignoring the alignment fault is not an option on these + * CPUs since we spin re-faulting the instruction without + * making any progress. + */ + if (cpu_is_v6_unaligned() && !(new_usermode & (UM_FIXUP | UM_SIGNAL))) { + new_usermode |= UM_FIXUP; + + if (warn) + printk(KERN_WARNING "alignment: ignoring faults is unsafe on this CPU. Defaulting to fixup mode.\n"); + } + + return new_usermode; +} + #ifdef CONFIG_PROC_FS static const char *usermode_action[] = { "ignored", @@ -125,7 +157,7 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer if (get_user(mode, buffer)) return -EFAULT; if (mode >= '0' && mode <= '5') - ai_usermode = mode - '0'; + ai_usermode = safe_usermode(mode - '0', true); } return count; } @@ -670,7 +702,6 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, unsigned long instr = *pinstr; u16 tinst1 = (instr >> 16) & 0xffff; u16 tinst2 = instr & 0xffff; - poffset->un = 0; switch (tinst1 & 0xffe0) { /* A6.3.5 Load/Store multiple */ @@ -717,38 +748,42 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, static int do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - union offset_union offset; + union offset_union uninitialized_var(offset); unsigned long instr = 0, instrptr; int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs); unsigned int type; - mm_segment_t fs; unsigned int fault; u16 tinstr = 0; int isize = 4; int thumb2_32b = 0; + if (interrupts_enabled(regs)) + local_irq_enable(); + instrptr = instruction_pointer(regs); - fs = get_fs(); - set_fs(KERNEL_DS); if (thumb_mode(regs)) { - fault = __get_user(tinstr, (u16 *)(instrptr & ~1)); + u16 *ptr = (u16 *)(instrptr & ~1); + fault = probe_kernel_address(ptr, tinstr); + tinstr = __mem_to_opcode_thumb16(tinstr); if (!fault) { if (cpu_architecture() >= CPU_ARCH_ARMv7 && IS_T32(tinstr)) { /* Thumb-2 32-bit */ u16 tinst2 = 0; - fault = __get_user(tinst2, (u16 *)(instrptr+2)); - instr = (tinstr << 16) | tinst2; + fault = probe_kernel_address(ptr + 1, tinst2); + tinst2 = __mem_to_opcode_thumb16(tinst2); + instr = __opcode_thumb32_compose(tinstr, tinst2); thumb2_32b = 1; } else { isize = 2; instr = thumb2arm(tinstr); } } - } else - fault = __get_user(instr, (u32 *)instrptr); - set_fs(fs); + } else { + fault = probe_kernel_address(instrptr, instr); + instr = __mem_to_opcode_arm(instr); + } if (fault) { type = TYPE_FAULT; @@ -822,10 +857,13 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) break; case 0x08000000: /* ldm or stm, or thumb-2 32bit instruction */ - if (thumb2_32b) + if (thumb2_32b) { + offset.un = 0; handler = do_alignment_t32_to_handler(&instr, regs, &offset); - else + } else { + offset.un = 0; handler = do_alignment_ldmstm; + } break; default: @@ -883,9 +921,16 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (ai_usermode & UM_FIXUP) goto fixup; - if (ai_usermode & UM_SIGNAL) - force_sig(SIGBUS, current); - else { + if (ai_usermode & UM_SIGNAL) { + siginfo_t si; + + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRALN; + si.si_addr = (void __user *)addr; + + force_sig_info(si.si_signo, &si, current); + } else { /* * We're about to disable the alignment trap and return to * user space. But if an interrupt occurs before actually @@ -906,6 +951,13 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) return 0; } +static int __init noalign_setup(char *__unused) +{ + set_cr(__clear_cr(CR_A)); + return 1; +} +__setup("noalign", noalign_setup); + /* * This needs to be done after sysctl_init, otherwise sys/ will be * overwritten. Actually, this shouldn't be in sys/ at all since @@ -923,23 +975,14 @@ static int __init alignment_init(void) return -ENOMEM; #endif - /* - * ARMv6 and later CPUs can perform unaligned accesses for - * most single load and store instructions up to word size. - * LDM, STM, LDRD and STRD still need to be handled. - * - * Ignoring the alignment fault is not an option on these - * CPUs since we spin re-faulting the instruction without - * making any progress. - */ - if (cpu_architecture() >= CPU_ARCH_ARMv6 && (cr_alignment & CR_U)) { - cr_alignment &= ~CR_A; - cr_no_alignment &= ~CR_A; - set_cr(cr_alignment); - ai_usermode = UM_FIXUP; + if (cpu_is_v6_unaligned()) { + set_cr(__clear_cr(CR_A)); + ai_usermode = safe_usermode(ai_usermode, false); } - hook_fault_code(1, do_alignment, SIGBUS, BUS_ADRALN, + cr_no_alignment = get_cr() & ~CR_A; + + hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN, "alignment exception"); /* diff --git a/arch/arm/mm/cache-aurora-l2.h b/arch/arm/mm/cache-aurora-l2.h new file mode 100644 index 00000000000..c8612476983 --- /dev/null +++ b/arch/arm/mm/cache-aurora-l2.h @@ -0,0 +1,55 @@ +/* + * AURORA shared L2 cache controller support + * + * Copyright (C) 2012 Marvell + * + * Yehuda Yitschak <yehuday@marvell.com> + * Gregory CLEMENT <gregory.clement@free-electrons.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __ASM_ARM_HARDWARE_AURORA_L2_H +#define __ASM_ARM_HARDWARE_AURORA_L2_H + +#define AURORA_SYNC_REG 0x700 +#define AURORA_RANGE_BASE_ADDR_REG 0x720 +#define AURORA_FLUSH_PHY_ADDR_REG 0x7f0 +#define AURORA_INVAL_RANGE_REG 0x774 +#define AURORA_CLEAN_RANGE_REG 0x7b4 +#define AURORA_FLUSH_RANGE_REG 0x7f4 + +#define AURORA_ACR_REPLACEMENT_OFFSET 27 +#define AURORA_ACR_REPLACEMENT_MASK \ + (0x3 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_WAYRR \ + (0 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_LFSR \ + (1 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU \ + (3 << AURORA_ACR_REPLACEMENT_OFFSET) + +#define AURORA_ACR_FORCE_WRITE_POLICY_OFFSET 0 +#define AURORA_ACR_FORCE_WRITE_POLICY_MASK \ + (0x3 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_POLICY_DIS \ + (0 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_BACK_POLICY \ + (1 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_THRO_POLICY \ + (2 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) + +#define MAX_RANGE_SIZE 1024 + +#define AURORA_WAY_SIZE_SHIFT 2 + +#define AURORA_CTRL_FW 0x100 + +/* chose a number outside L2X0_CACHE_ID_PART_MASK to be sure to make + * the distinction between a number coming from hardware and a number + * coming from the device tree */ +#define AURORA_CACHE_ID 0x100 + +#endif /* __ASM_ARM_HARDWARE_AURORA_L2_H */ diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S index 1fa6f71470d..e505befe51b 100644 --- a/arch/arm/mm/cache-fa.S +++ b/arch/arm/mm/cache-fa.S @@ -240,18 +240,10 @@ ENTRY(fa_dma_unmap_area) mov pc, lr ENDPROC(fa_dma_unmap_area) + .globl fa_flush_kern_cache_louis + .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all + __INITDATA - .type fa_cache_fns, #object -ENTRY(fa_cache_fns) - .long fa_flush_icache_all - .long fa_flush_kern_cache_all - .long fa_flush_user_cache_all - .long fa_flush_user_cache_range - .long fa_coherent_kern_range - .long fa_coherent_user_range - .long fa_flush_kern_dcache_area - .long fa_dma_map_area - .long fa_dma_unmap_area - .long fa_dma_flush_range - .size fa_cache_fns, . - fa_cache_fns + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions fa diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c index 6e77c042d8e..e028a7f2ebc 100644 --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -13,13 +13,15 @@ */ #include <linux/init.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/highmem.h> +#include <linux/io.h> #include <asm/cacheflush.h> -#include <asm/kmap_types.h> -#include <asm/fixmap.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <plat/cache-feroceon-l2.h> -#include "mm.h" +#include <asm/cp15.h> +#include <asm/hardware/cache-feroceon-l2.h> + +#define L2_WRITETHROUGH_KIRKWOOD BIT(4) /* * Low-level cache maintenance operations. @@ -39,27 +41,30 @@ * between which we don't want to be preempted. */ -static inline unsigned long l2_start_va(unsigned long paddr) +static inline unsigned long l2_get_va(unsigned long paddr) { #ifdef CONFIG_HIGHMEM /* - * Let's do our own fixmap stuff in a minimal way here. * Because range ops can't be done on physical addresses, * we simply install a virtual mapping for it only for the * TLB lookup to occur, hence no need to flush the untouched - * memory mapping. This is protected with the disabling of - * interrupts by the caller. + * memory mapping afterwards (note: a cache flush may happen + * in some circumstances depending on the path taken in kunmap_atomic). */ - unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); - unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0); - local_flush_tlb_kernel_page(vaddr); - return vaddr + (paddr & ~PAGE_MASK); + void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT); + return (unsigned long)vaddr + (paddr & ~PAGE_MASK); #else return __phys_to_virt(paddr); #endif } +static inline void l2_put_va(unsigned long vaddr) +{ +#ifdef CONFIG_HIGHMEM + kunmap_atomic((void *)vaddr); +#endif +} + static inline void l2_clean_pa(unsigned long addr) { __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); @@ -76,13 +81,14 @@ static inline void l2_clean_pa_range(unsigned long start, unsigned long end) */ BUG_ON((start ^ end) >> PAGE_SHIFT); - raw_local_irq_save(flags); - va_start = l2_start_va(start); + va_start = l2_get_va(start); va_end = va_start + (end - start); + raw_local_irq_save(flags); __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" "mcr p15, 1, %1, c15, c9, 5" : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); + l2_put_va(va_start); } static inline void l2_clean_inv_pa(unsigned long addr) @@ -106,13 +112,14 @@ static inline void l2_inv_pa_range(unsigned long start, unsigned long end) */ BUG_ON((start ^ end) >> PAGE_SHIFT); - raw_local_irq_save(flags); - va_start = l2_start_va(start); + va_start = l2_get_va(start); va_end = va_start + (end - start); + raw_local_irq_save(flags); __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" "mcr p15, 1, %1, c15, c11, 5" : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); + l2_put_va(va_start); } static inline void l2_inv_all(void) @@ -329,7 +336,9 @@ static void __init enable_l2(void) enable_icache(); if (d) enable_dcache(); - } + } else + pr_err(FW_BUG + "Feroceon L2: bootloader left the L2 cache on!\n"); } void __init feroceon_l2_init(int __l2_wt_override) @@ -347,3 +356,41 @@ void __init feroceon_l2_init(int __l2_wt_override) printk(KERN_INFO "Feroceon L2: Cache support initialised%s.\n", l2_wt_override ? ", in WT override mode" : ""); } +#ifdef CONFIG_OF +static const struct of_device_id feroceon_ids[] __initconst = { + { .compatible = "marvell,kirkwood-cache"}, + { .compatible = "marvell,feroceon-cache"}, + {} +}; + +int __init feroceon_of_init(void) +{ + struct device_node *node; + void __iomem *base; + bool l2_wt_override = false; + struct resource res; + +#if defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + l2_wt_override = true; +#endif + + node = of_find_matching_node(NULL, feroceon_ids); + if (node && of_device_is_compatible(node, "marvell,kirkwood-cache")) { + if (of_address_to_resource(node, 0, &res)) + return -ENODEV; + + base = ioremap(res.start, resource_size(&res)); + if (!base) + return -ENOMEM; + + if (l2_wt_override) + writel(readl(base) | L2_WRITETHROUGH_KIRKWOOD, base); + else + writel(readl(base) & ~L2_WRITETHROUGH_KIRKWOOD, base); + } + + feroceon_l2_init(l2_wt_override); + + return 0; +} +#endif diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 170c9bb9586..7c3fb41a462 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -16,91 +16,151 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/cpu.h> +#include <linux/err.h> #include <linux/init.h> +#include <linux/smp.h> #include <linux/spinlock.h> #include <linux/io.h> +#include <linux/of.h> +#include <linux/of_address.h> #include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h> #include <asm/hardware/cache-l2x0.h> +#include "cache-tauros3.h" +#include "cache-aurora-l2.h" + +struct l2c_init_data { + const char *type; + unsigned way_size_0; + unsigned num_lock; + void (*of_parse)(const struct device_node *, u32 *, u32 *); + void (*enable)(void __iomem *, u32, unsigned); + void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); + void (*save)(void __iomem *); + struct outer_cache_fns outer_cache; +}; #define CACHE_LINE_SIZE 32 static void __iomem *l2x0_base; -static DEFINE_SPINLOCK(l2x0_lock); -static uint32_t l2x0_way_mask; /* Bitmask of active ways */ -static uint32_t l2x0_size; +static DEFINE_RAW_SPINLOCK(l2x0_lock); +static u32 l2x0_way_mask; /* Bitmask of active ways */ +static u32 l2x0_size; +static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; -static inline void cache_wait_way(void __iomem *reg, unsigned long mask) +struct l2x0_regs l2x0_saved_regs; + +/* + * Common code for all cache controllers. + */ +static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask) { /* wait for cache operation by line or way to complete */ while (readl_relaxed(reg) & mask) - ; + cpu_relax(); } -#ifdef CONFIG_CACHE_PL310 -static inline void cache_wait(void __iomem *reg, unsigned long mask) +/* + * By default, we write directly to secure registers. Platforms must + * override this if they are running non-secure. + */ +static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg) { - /* cache operations by line are atomic on PL310 */ + if (val == readl_relaxed(base + reg)) + return; + if (outer_cache.write_sec) + outer_cache.write_sec(val, reg); + else + writel_relaxed(val, base + reg); } -#else -#define cache_wait cache_wait_way -#endif -static inline void cache_sync(void) +/* + * This should only be called when we have a requirement that the + * register be written due to a work-around, as platforms running + * in non-secure mode may not be able to access this register. + */ +static inline void l2c_set_debug(void __iomem *base, unsigned long val) { - void __iomem *base = l2x0_base; - writel_relaxed(0, base + L2X0_CACHE_SYNC); - cache_wait(base + L2X0_CACHE_SYNC, 1); + l2c_write_sec(val, base, L2X0_DEBUG_CTRL); } -static inline void l2x0_clean_line(unsigned long addr) +static void __l2c_op_way(void __iomem *reg) { - void __iomem *base = l2x0_base; - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(l2x0_way_mask, reg); + l2c_wait_mask(reg, l2x0_way_mask); } -static inline void l2x0_inv_line(unsigned long addr) +static inline void l2c_unlock(void __iomem *base, unsigned num) { - void __iomem *base = l2x0_base; - cache_wait(base + L2X0_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_INV_LINE_PA); + unsigned i; + + for (i = 0; i < num; i++) { + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE + + i * L2X0_LOCKDOWN_STRIDE); + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE + + i * L2X0_LOCKDOWN_STRIDE); + } } -#ifdef CONFIG_PL310_ERRATA_588369 -static void debug_writel(unsigned long val) +/* + * Enable the L2 cache controller. This function must only be + * called when the cache controller is known to be disabled. + */ +static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock) { - extern void omap_smc1(u32 fn, u32 arg); + unsigned long flags; - /* - * Texas Instrument secure monitor api to modify the - * PL310 Debug Control Register. - */ - omap_smc1(0x100, val); + l2c_write_sec(aux, base, L2X0_AUX_CTRL); + + l2c_unlock(base, num_lock); + + local_irq_save(flags); + __l2c_op_way(base + L2X0_INV_WAY); + writel_relaxed(0, base + sync_reg_offset); + l2c_wait_mask(base + sync_reg_offset, 1); + local_irq_restore(flags); + + l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL); } -static inline void l2x0_flush_line(unsigned long addr) +static void l2c_disable(void) { void __iomem *base = l2x0_base; - /* Clean by PA followed by Invalidate by PA */ - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); - cache_wait(base + L2X0_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_INV_LINE_PA); + outer_cache.flush_all(); + l2c_write_sec(0, base, L2X0_CTRL); + dsb(st); } -#else -/* Optimised out for non-errata case */ -static inline void debug_writel(unsigned long val) +#ifdef CONFIG_CACHE_PL310 +static inline void cache_wait(void __iomem *reg, unsigned long mask) { + /* cache operations by line are atomic on PL310 */ } +#else +#define cache_wait l2c_wait_mask +#endif -static inline void l2x0_flush_line(unsigned long addr) +static inline void cache_sync(void) { void __iomem *base = l2x0_base; - cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA); + + writel_relaxed(0, base + sync_reg_offset); + cache_wait(base + L2X0_CACHE_SYNC, 1); +} + +#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) +static inline void debug_writel(unsigned long val) +{ + l2c_set_debug(l2x0_base, val); +} +#else +/* Optimised out for non-errata case */ +static inline void debug_writel(unsigned long val) +{ } #endif @@ -108,9 +168,17 @@ static void l2x0_cache_sync(void) { unsigned long flags; - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void __l2x0_flush_all(void) +{ + debug_writel(0x03); + __l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY); + cache_sync(); + debug_writel(0x00); } static void l2x0_flush_all(void) @@ -118,219 +186,1362 @@ static void l2x0_flush_all(void) unsigned long flags; /* clean all ways */ - spin_lock_irqsave(&l2x0_lock, flags); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_INV_WAY); - cache_wait_way(l2x0_base + L2X0_CLEAN_INV_WAY, l2x0_way_mask); - cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2x0_flush_all(); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_clean_all(void) +static void l2x0_disable(void) { unsigned long flags; - /* clean all ways */ - spin_lock_irqsave(&l2x0_lock, flags); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY); - cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask); - cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2x0_flush_all(); + l2c_write_sec(0, l2x0_base, L2X0_CTRL); + dsb(st); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_inv_all(void) +static void l2c_save(void __iomem *base) { - unsigned long flags; + l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +} - /* invalidate all ways */ - spin_lock_irqsave(&l2x0_lock, flags); - /* Invalidating when L2 is enabled is a nono */ - BUG_ON(readl(l2x0_base + L2X0_CTRL) & 1); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); - cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); - cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); +/* + * L2C-210 specific code. + * + * The L2C-2x0 PA, set/way and sync operations are atomic, but we must + * ensure that no background operation is running. The way operations + * are all background tasks. + * + * While a background operation is in progress, any new operation is + * ignored (unspecified whether this causes an error.) Thankfully, not + * used on SMP. + * + * Never has a different sync register other than L2X0_CACHE_SYNC, but + * we use sync_reg_offset here so we can share some of this with L2C-310. + */ +static void __l2c210_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + sync_reg_offset); } -static void l2x0_inv_range(unsigned long start, unsigned long end) +static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end) +{ + while (start < end) { + writel_relaxed(start, reg); + start += CACHE_LINE_SIZE; + } +} + +static void l2c210_inv_range(unsigned long start, unsigned long end) { void __iomem *base = l2x0_base; - unsigned long flags; - spin_lock_irqsave(&l2x0_lock, flags); if (start & (CACHE_LINE_SIZE - 1)) { start &= ~(CACHE_LINE_SIZE - 1); - debug_writel(0x03); - l2x0_flush_line(start); - debug_writel(0x00); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); start += CACHE_LINE_SIZE; } if (end & (CACHE_LINE_SIZE - 1)) { end &= ~(CACHE_LINE_SIZE - 1); - debug_writel(0x03); - l2x0_flush_line(end); - debug_writel(0x00); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); } + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_clean_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_all(void) +{ + void __iomem *base = l2x0_base; + + BUG_ON(!irqs_disabled()); + + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + __l2c210_cache_sync(base); +} + +static void l2c210_sync(void) +{ + __l2c210_cache_sync(l2x0_base); +} + +static void l2c210_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1); +} + +static const struct l2c_init_data l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c_disable, + .sync = l2c210_sync, + .resume = l2c210_resume, + }, +}; + +/* + * L2C-220 specific code. + * + * All operations are background operations: they have to be waited for. + * Conflicting requests generate a slave error (which will cause an + * imprecise abort.) Never uses sync_reg_offset, so we hard-code the + * sync register here. + * + * However, we can re-use the l2c210_resume call. + */ +static inline void __l2c220_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + L2X0_CACHE_SYNC); + l2c_wait_mask(base + L2X0_CACHE_SYNC, 1); +} + +static void l2c220_op_way(void __iomem *base, unsigned reg) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + reg); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end, unsigned long flags) +{ + raw_spinlock_t *lock = &l2x0_lock; + while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); while (start < blk_end) { - l2x0_inv_line(start); + l2c_wait_mask(reg, 1); + writel_relaxed(start, reg); start += CACHE_LINE_SIZE; } if (blk_end < end) { - spin_unlock_irqrestore(&l2x0_lock, flags); - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); } } - cache_wait(base + L2X0_INV_LINE_PA, 1); - cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); + + return flags; } -static void l2x0_clean_range(unsigned long start, unsigned long end) +static void l2c220_inv_range(unsigned long start, unsigned long end) { void __iomem *base = l2x0_base; unsigned long flags; + raw_spin_lock_irqsave(&l2x0_lock, flags); + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); + } + } + + flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_clean_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + start &= ~(CACHE_LINE_SIZE - 1); if ((end - start) >= l2x0_size) { - l2x0_clean_all(); + l2c220_op_way(base, L2X0_CLEAN_WAY); return; } - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + start &= ~(CACHE_LINE_SIZE - 1); + if ((end - start) >= l2x0_size) { + l2c220_op_way(base, L2X0_CLEAN_INV_WAY); + return; + } + + raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_all(void) +{ + l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY); +} + +static void l2c220_sync(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c220_cache_sync(l2x0_base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L220_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); +} + +static const struct l2c_init_data l2c220_data = { + .type = "L2C-220", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c220_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c220_inv_range, + .clean_range = l2c220_clean_range, + .flush_range = l2c220_flush_range, + .flush_all = l2c220_flush_all, + .disable = l2c_disable, + .sync = l2c220_sync, + .resume = l2c210_resume, + }, +}; + +/* + * L2C-310 specific code. + * + * Very similar to L2C-210, the PA, set/way and sync operations are atomic, + * and the way operations are all background tasks. However, issuing an + * operation while a background operation is in progress results in a + * SLVERR response. We can reuse: + * + * __l2c210_cache_sync (using sync_reg_offset) + * l2c210_sync + * l2c210_inv_range (if 588369 is not applicable) + * l2c210_clean_range + * l2c210_flush_range (if 588369 is not applicable) + * l2c210_flush_all (if 727915 is not applicable) + * + * Errata: + * 588369: PL310 R0P0->R1P0, fixed R2P0. + * Affects: all clean+invalidate operations + * clean and invalidate skips the invalidate step, so we need to issue + * separate operations. We also require the above debug workaround + * enclosing this code fragment on affected parts. On unaffected parts, + * we must not use this workaround without the debug register writes + * to avoid exposing a problem similar to 727915. + * + * 727915: PL310 R2P0->R3P0, fixed R3P1. + * Affects: clean+invalidate by way + * clean and invalidate by way runs in the background, and a store can + * hit the line between the clean operation and invalidate operation, + * resulting in the store being lost. + * + * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2. + * Affects: 8x64-bit (double fill) line fetches + * double fill line fetches can fail to cause dirty data to be evicted + * from the cache before the new data overwrites the second line. + * + * 753970: PL310 R3P0, fixed R3P1. + * Affects: sync + * prevents merging writes after the sync operation, until another L2C + * operation is performed (or a number of other conditions.) + * + * 769419: PL310 R0P0->R3P1, fixed R3P2. + * Affects: store buffer + * store buffer is not automatically drained. + */ +static void l2c310_inv_range_erratum(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + unsigned long flags; + + /* Erratum 588369 for both clean+invalidate operations */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + l2c_set_debug(base, 0x03); + + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(end, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(end, base + L2X0_INV_LINE_PA); + } + + l2c_set_debug(base, 0x00); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + } + + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c310_flush_range_erratum(unsigned long start, unsigned long end) +{ + raw_spinlock_t *lock = &l2x0_lock; + unsigned long flags; + void __iomem *base = l2x0_base; + + raw_spin_lock_irqsave(lock, flags); while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); + l2c_set_debug(base, 0x03); while (start < blk_end) { - l2x0_clean_line(start); + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); start += CACHE_LINE_SIZE; } + l2c_set_debug(base, 0x00); if (blk_end < end) { - spin_unlock_irqrestore(&l2x0_lock, flags); - spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); } } - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + __l2c210_cache_sync(base); } -static void l2x0_flush_range(unsigned long start, unsigned long end) +static void l2c310_flush_all_erratum(void) { void __iomem *base = l2x0_base; unsigned long flags; - if ((end - start) >= l2x0_size) { - l2x0_flush_all(); - return; + raw_spin_lock_irqsave(&l2x0_lock, flags); + l2c_set_debug(base, 0x03); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + l2c_set_debug(base, 0x00); + __l2c210_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void __init l2c310_save(void __iomem *base) +{ + unsigned revision; + + l2c_save(base); + + l2x0_saved_regs.tag_latency = readl_relaxed(base + + L310_TAG_LATENCY_CTRL); + l2x0_saved_regs.data_latency = readl_relaxed(base + + L310_DATA_LATENCY_CTRL); + l2x0_saved_regs.filter_end = readl_relaxed(base + + L310_ADDR_FILTER_END); + l2x0_saved_regs.filter_start = readl_relaxed(base + + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + /* From r2p0, there is Prefetch offset/control register */ + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base + + L310_PREFETCH_CTRL); + + /* From r3p0, there is Power control register */ + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2x0_saved_regs.pwr_ctrl = readl_relaxed(base + + L310_POWER_CTRL); +} + +static void l2c310_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { + unsigned revision; + + /* restore pl310 setup */ + writel_relaxed(l2x0_saved_regs.tag_latency, + base + L310_TAG_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.data_latency, + base + L310_DATA_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.filter_end, + base + L310_ADDR_FILTER_END); + writel_relaxed(l2x0_saved_regs.filter_start, + base + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, + L310_PREFETCH_CTRL); + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, + L310_POWER_CTRL); + + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); + + /* Re-enable full-line-of-zeros for Cortex-A9 */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + } +} + +static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data) +{ + switch (act & ~CPU_TASKS_FROZEN) { + case CPU_STARTING: + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + break; + case CPU_DYING: + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); + break; } + return NOTIFY_OK; +} - spin_lock_irqsave(&l2x0_lock, flags); - start &= ~(CACHE_LINE_SIZE - 1); - while (start < end) { - unsigned long blk_end = start + min(end - start, 4096UL); +static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ + unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; + bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; - debug_writel(0x03); - while (start < blk_end) { - l2x0_flush_line(start); - start += CACHE_LINE_SIZE; + if (rev >= L310_CACHE_ID_RTL_R2P0) { + if (cortex_a9) { + aux |= L310_AUX_CTRL_EARLY_BRESP; + pr_info("L2C-310 enabling early BRESP for Cortex-A9\n"); + } else if (aux & L310_AUX_CTRL_EARLY_BRESP) { + pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n"); + aux &= ~L310_AUX_CTRL_EARLY_BRESP; } - debug_writel(0x00); + } - if (blk_end < end) { - spin_unlock_irqrestore(&l2x0_lock, flags); - spin_lock_irqsave(&l2x0_lock, flags); + if (cortex_a9) { + u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL); + u32 acr = get_auxcr(); + + pr_debug("Cortex-A9 ACR=0x%08x\n", acr); + + if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO)) + pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n"); + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3))) + pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n"); + + if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) { + aux |= L310_AUX_CTRL_FULL_LINE_ZERO; + pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n"); } + } else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) { + pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n"); + aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP); + } + + if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) { + u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL); + + pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n", + aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "", + aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "", + 1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK)); + } + + /* r3p0 or later has power control register */ + if (rev >= L310_CACHE_ID_RTL_R3P0) { + u32 power_ctrl; + + l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN, + base, L310_POWER_CTRL); + power_ctrl = readl_relaxed(base + L310_POWER_CTRL); + pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n", + power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis", + power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); + } + + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L310_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) { + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + cpu_notifier(l2c310_cpu_enable_flz, 0); } - cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); - cache_sync(); - spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_disable(void) +static void __init l2c310_fixup(void __iomem *base, u32 cache_id, + struct outer_cache_fns *fns) { - unsigned long flags; + unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK; + const char *errata[8]; + unsigned n = 0; + + if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) && + revision < L310_CACHE_ID_RTL_R2P0 && + /* For bcm compatibility */ + fns->inv_range == l2c210_inv_range) { + fns->inv_range = l2c310_inv_range_erratum; + fns->flush_range = l2c310_flush_range_erratum; + errata[n++] = "588369"; + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) && + revision >= L310_CACHE_ID_RTL_R2P0 && + revision < L310_CACHE_ID_RTL_R3P1) { + fns->flush_all = l2c310_flush_all_erratum; + errata[n++] = "727915"; + } - spin_lock_irqsave(&l2x0_lock, flags); - writel(0, l2x0_base + L2X0_CTRL); - spin_unlock_irqrestore(&l2x0_lock, flags); + if (revision >= L310_CACHE_ID_RTL_R3P0 && + revision < L310_CACHE_ID_RTL_R3P2) { + u32 val = readl_relaxed(base + L310_PREFETCH_CTRL); + /* I don't think bit23 is required here... but iMX6 does so */ + if (val & (BIT(30) | BIT(23))) { + val &= ~(BIT(30) | BIT(23)); + l2c_write_sec(val, base, L310_PREFETCH_CTRL); + errata[n++] = "752271"; + } + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) && + revision == L310_CACHE_ID_RTL_R3P0) { + sync_reg_offset = L2X0_DUMMY_REG; + errata[n++] = "753970"; + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_769419)) + errata[n++] = "769419"; + + if (n) { + unsigned i; + + pr_info("L2C-310 errat%s", n > 1 ? "a" : "um"); + for (i = 0; i < n; i++) + pr_cont(" %s", errata[i]); + pr_cont(" enabled\n"); + } } -void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask) +static void l2c310_disable(void) { - __u32 aux; - __u32 cache_id; - __u32 way_size = 0; - int ways; - const char *type; + /* + * If full-line-of-zeros is enabled, we must first disable it in the + * Cortex-A9 auxiliary control register before disabling the L2 cache. + */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); - l2x0_base = base; + l2c_disable(); +} - cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); - aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +static const struct l2c_init_data l2c310_init_fns __initconst = { + .type = "L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +static void __init __l2c_init(const struct l2c_init_data *data, + u32 aux_val, u32 aux_mask, u32 cache_id) +{ + struct outer_cache_fns fns; + unsigned way_size_bits, ways; + u32 aux, old_aux; + /* + * Sanity check the aux values. aux_mask is the bits we preserve + * from reading the hardware register, and aux_val is the bits we + * set. + */ + if (aux_val & aux_mask) + pr_alert("L2C: platform provided aux values permit register corruption.\n"); + + old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); aux &= aux_mask; aux |= aux_val; + if (old_aux != aux) + pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n", + old_aux, aux); + /* Determine the number of ways */ switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: + if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16)) + pr_warn("L2C: DT/platform tries to modify or specify cache size\n"); if (aux & (1 << 16)) ways = 16; else ways = 8; - type = "L310"; break; + case L2X0_CACHE_ID_PART_L210: + case L2X0_CACHE_ID_PART_L220: + ways = (aux >> 13) & 0xf; + break; + + case AURORA_CACHE_ID: ways = (aux >> 13) & 0xf; - type = "L210"; + ways = 2 << ((ways + 1) >> 2); break; + default: /* Assume unknown chips have 8 ways */ ways = 8; - type = "L2x0 series"; break; } l2x0_way_mask = (1 << ways) - 1; /* - * L2 cache Size = Way size * Number of ways + * way_size_0 is the size that a way_size value of zero would be + * given the calculation: way_size = way_size_0 << way_size_bits. + * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k, + * then way_size_0 would be 8k. + * + * L2 cache size = number of ways * way size. + */ + way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >> + L2C_AUX_CTRL_WAY_SIZE_SHIFT; + l2x0_size = ways * (data->way_size_0 << way_size_bits); + + fns = data->outer_cache; + fns.write_sec = outer_cache.write_sec; + if (data->fixup) + data->fixup(l2x0_base, cache_id, &fns); + + /* + * Check if l2x0 controller is already enabled. If we are booting + * in non-secure mode accessing the below registers will fault. */ - way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17; - way_size = 1 << (way_size + 3); - l2x0_size = ways * way_size * SZ_1K; + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) + data->enable(l2x0_base, aux, data->num_lock); + + outer_cache = fns; /* - * Check if l2x0 controller is already enabled. - * If you are booting from non-secure mode - * accessing the below registers will fault. + * It is strange to save the register state before initialisation, + * but hey, this is what the DT implementations decided to do. */ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + if (data->save) + data->save(l2x0_base); + + /* Re-read it in case some bits are reserved. */ + aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + + pr_info("%s cache controller enabled, %d ways, %d kB\n", + data->type, ways, l2x0_size >> 10); + pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", + data->type, cache_id, aux); +} + +void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +{ + const struct l2c_init_data *data; + u32 cache_id; + + l2x0_base = base; + + cache_id = readl_relaxed(base + L2X0_CACHE_ID); + + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { + default: + case L2X0_CACHE_ID_PART_L210: + data = &l2c210_data; + break; + + case L2X0_CACHE_ID_PART_L220: + data = &l2c220_data; + break; + + case L2X0_CACHE_ID_PART_L310: + data = &l2c310_init_fns; + break; + } + + __l2c_init(data, aux_val, aux_mask, cache_id); +} + +#ifdef CONFIG_OF +static int l2_wt_override; - /* l2x0 controller is disabled */ - writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL); +/* Aurora don't have the cache ID register available, so we have to + * pass it though the device tree */ +static u32 cache_id_part_number_from_dt; - l2x0_inv_all(); +static void __init l2x0_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[2] = { 0, 0 }; + u32 tag = 0; + u32 dirty = 0; + u32 val = 0, mask = 0; + + of_property_read_u32(np, "arm,tag-latency", &tag); + if (tag) { + mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; + val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; + } + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1]) { + mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | + L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; + val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | + ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); + } + + of_property_read_u32(np, "arm,dirty-latency", &dirty); + if (dirty) { + mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; + val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static const struct l2c_init_data of_l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .of_parse = l2x0_of_parse, + .enable = l2c_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c_disable, + .sync = l2c210_sync, + .resume = l2c210_resume, + }, +}; + +static const struct l2c_init_data of_l2c220_data __initconst = { + .type = "L2C-220", + .way_size_0 = SZ_8K, + .num_lock = 1, + .of_parse = l2x0_of_parse, + .enable = l2c220_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c220_inv_range, + .clean_range = l2c220_clean_range, + .flush_range = l2c220_flush_range, + .flush_all = l2c220_flush_all, + .disable = l2c_disable, + .sync = l2c220_sync, + .resume = l2c210_resume, + }, +}; + +static void __init l2c310_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[3] = { 0, 0, 0 }; + u32 tag[3] = { 0, 0, 0 }; + u32 filter[2] = { 0, 0 }; + + of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); + if (tag[0] && tag[1] && tag[2]) + writel_relaxed( + L310_LATENCY_CTRL_RD(tag[0] - 1) | + L310_LATENCY_CTRL_WR(tag[1] - 1) | + L310_LATENCY_CTRL_SETUP(tag[2] - 1), + l2x0_base + L310_TAG_LATENCY_CTRL); + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1] && data[2]) + writel_relaxed( + L310_LATENCY_CTRL_RD(data[0] - 1) | + L310_LATENCY_CTRL_WR(data[1] - 1) | + L310_LATENCY_CTRL_SETUP(data[2] - 1), + l2x0_base + L310_DATA_LATENCY_CTRL); - /* enable L2X0 */ - writel_relaxed(1, l2x0_base + L2X0_CTRL); + of_property_read_u32_array(np, "arm,filter-ranges", + filter, ARRAY_SIZE(filter)); + if (filter[1]) { + writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), + l2x0_base + L310_ADDR_FILTER_END); + writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, + l2x0_base + L310_ADDR_FILTER_START); } +} + +static const struct l2c_init_data of_l2c310_data __initconst = { + .type = "L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +/* + * This is a variant of the of_l2c310_data with .sync set to + * NULL. Outer sync operations are not needed when the system is I/O + * coherent, and potentially harmful in certain situations (PCIe/PL310 + * deadlock on Armada 375/38x due to hardware I/O coherency). The + * other operations are kept because they are infrequent (therefore do + * not cause the deadlock in practice) and needed for secondary CPU + * boot and other power management activities. + */ +static const struct l2c_init_data of_l2c310_coherent_data __initconst = { + .type = "L2C-310 Coherent", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .resume = l2c310_resume, + }, +}; + +/* + * Note that the end addresses passed to Linux primitives are + * noninclusive, while the hardware cache range operations use + * inclusive start and end addresses. + */ +static unsigned long calc_range_end(unsigned long start, unsigned long end) +{ + /* + * Limit the number of cache lines processed at once, + * since cache range operations stall the CPU pipeline + * until completion. + */ + if (end > start + MAX_RANGE_SIZE) + end = start + MAX_RANGE_SIZE; + + /* + * Cache range operations can't straddle a page boundary. + */ + if (end > PAGE_ALIGN(start+1)) + end = PAGE_ALIGN(start+1); + + return end; +} + +/* + * Make sure 'start' and 'end' reference the same page, as L2 is PIPT + * and range operations only do a TLB lookup on the start address. + */ +static void aurora_pa_range(unsigned long start, unsigned long end, + unsigned long offset) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG); + writel_relaxed(end, l2x0_base + offset); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + cache_sync(); +} + +static void aurora_inv_range(unsigned long start, unsigned long end) +{ + /* + * round start and end adresses up to cache line size + */ + start &= ~(CACHE_LINE_SIZE - 1); + end = ALIGN(end, CACHE_LINE_SIZE); - outer_cache.inv_range = l2x0_inv_range; - outer_cache.clean_range = l2x0_clean_range; - outer_cache.flush_range = l2x0_flush_range; - outer_cache.sync = l2x0_cache_sync; - outer_cache.flush_all = l2x0_flush_all; - outer_cache.inv_all = l2x0_inv_all; - outer_cache.disable = l2x0_disable; + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < end) { + unsigned long range_end = calc_range_end(start, end); + aurora_pa_range(start, range_end - CACHE_LINE_SIZE, + AURORA_INVAL_RANGE_REG); + start = range_end; + } +} - printk(KERN_INFO "%s cache controller enabled\n", type); - printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n", - ways, cache_id, aux, l2x0_size); +static void aurora_clean_range(unsigned long start, unsigned long end) +{ + /* + * If L2 is forced to WT, the L2 will always be clean and we + * don't need to do anything here. + */ + if (!l2_wt_override) { + start &= ~(CACHE_LINE_SIZE - 1); + end = ALIGN(end, CACHE_LINE_SIZE); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + aurora_pa_range(start, range_end - CACHE_LINE_SIZE, + AURORA_CLEAN_RANGE_REG); + start = range_end; + } + } } + +static void aurora_flush_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + end = ALIGN(end, CACHE_LINE_SIZE); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + /* + * If L2 is forced to WT, the L2 will always be clean and we + * just need to invalidate. + */ + if (l2_wt_override) + aurora_pa_range(start, range_end - CACHE_LINE_SIZE, + AURORA_INVAL_RANGE_REG); + else + aurora_pa_range(start, range_end - CACHE_LINE_SIZE, + AURORA_FLUSH_RANGE_REG); + start = range_end; + } +} + +static void aurora_save(void __iomem *base) +{ + l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL); + l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL); +} + +static void aurora_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) { + writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL); + writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL); + } +} + +/* + * For Aurora cache in no outer mode, enable via the CP15 coprocessor + * broadcasting of cache commands to L2. + */ +static void __init aurora_enable_no_outer(void __iomem *base, u32 aux, + unsigned num_lock) +{ + u32 u; + + asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u)); + u |= AURORA_CTRL_FW; /* Set the FW bit */ + asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u)); + + isb(); + + l2c_enable(base, aux, num_lock); +} + +static void __init aurora_fixup(void __iomem *base, u32 cache_id, + struct outer_cache_fns *fns) +{ + sync_reg_offset = AURORA_SYNC_REG; +} + +static void __init aurora_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; + u32 mask = AURORA_ACR_REPLACEMENT_MASK; + + of_property_read_u32(np, "cache-id-part", + &cache_id_part_number_from_dt); + + /* Determine and save the write policy */ + l2_wt_override = of_property_read_bool(np, "wt-override"); + + if (l2_wt_override) { + val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; + mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static const struct l2c_init_data of_aurora_with_outer_data __initconst = { + .type = "Aurora", + .way_size_0 = SZ_4K, + .num_lock = 4, + .of_parse = aurora_of_parse, + .enable = l2c_enable, + .fixup = aurora_fixup, + .save = aurora_save, + .outer_cache = { + .inv_range = aurora_inv_range, + .clean_range = aurora_clean_range, + .flush_range = aurora_flush_range, + .flush_all = l2x0_flush_all, + .disable = l2x0_disable, + .sync = l2x0_cache_sync, + .resume = aurora_resume, + }, +}; + +static const struct l2c_init_data of_aurora_no_outer_data __initconst = { + .type = "Aurora", + .way_size_0 = SZ_4K, + .num_lock = 4, + .of_parse = aurora_of_parse, + .enable = aurora_enable_no_outer, + .fixup = aurora_fixup, + .save = aurora_save, + .outer_cache = { + .resume = aurora_resume, + }, +}; + +/* + * For certain Broadcom SoCs, depending on the address range, different offsets + * need to be added to the address before passing it to L2 for + * invalidation/clean/flush + * + * Section Address Range Offset EMI + * 1 0x00000000 - 0x3FFFFFFF 0x80000000 VC + * 2 0x40000000 - 0xBFFFFFFF 0x40000000 SYS + * 3 0xC0000000 - 0xFFFFFFFF 0x80000000 VC + * + * When the start and end addresses have crossed two different sections, we + * need to break the L2 operation into two, each within its own section. + * For example, if we need to invalidate addresses starts at 0xBFFF0000 and + * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2) + * 0xC0000000 - 0xC0001000 + * + * Note 1: + * By breaking a single L2 operation into two, we may potentially suffer some + * performance hit, but keep in mind the cross section case is very rare + * + * Note 2: + * We do not need to handle the case when the start address is in + * Section 1 and the end address is in Section 3, since it is not a valid use + * case + * + * Note 3: + * Section 1 in practical terms can no longer be used on rev A2. Because of + * that the code does not need to handle section 1 at all. + * + */ +#define BCM_SYS_EMI_START_ADDR 0x40000000UL +#define BCM_VC_EMI_SEC3_START_ADDR 0xC0000000UL + +#define BCM_SYS_EMI_OFFSET 0x40000000UL +#define BCM_VC_EMI_OFFSET 0x80000000UL + +static inline int bcm_addr_is_sys_emi(unsigned long addr) +{ + return (addr >= BCM_SYS_EMI_START_ADDR) && + (addr < BCM_VC_EMI_SEC3_START_ADDR); +} + +static inline unsigned long bcm_l2_phys_addr(unsigned long addr) +{ + if (bcm_addr_is_sys_emi(addr)) + return addr + BCM_SYS_EMI_OFFSET; + else + return addr + BCM_VC_EMI_OFFSET; +} + +static void bcm_inv_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2c210_inv_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2c210_inv_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_clean_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2c210_clean_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2c210_clean_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_flush_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + if ((end - start) >= l2x0_size) { + outer_cache.flush_all(); + return; + } + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2c210_flush_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2c210_flush_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +/* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */ +static const struct l2c_init_data of_bcm_l2x0_data __initconst = { + .type = "BCM-L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .save = l2c310_save, + .outer_cache = { + .inv_range = bcm_inv_range, + .clean_range = bcm_clean_range, + .flush_range = bcm_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +static void __init tauros3_save(void __iomem *base) +{ + l2c_save(base); + + l2x0_saved_regs.aux2_ctrl = + readl_relaxed(base + TAUROS3_AUX2_CTRL); + l2x0_saved_regs.prefetch_ctrl = + readl_relaxed(base + L310_PREFETCH_CTRL); +} + +static void tauros3_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { + writel_relaxed(l2x0_saved_regs.aux2_ctrl, + base + TAUROS3_AUX2_CTRL); + writel_relaxed(l2x0_saved_regs.prefetch_ctrl, + base + L310_PREFETCH_CTRL); + + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); + } +} + +static const struct l2c_init_data of_tauros3_data __initconst = { + .type = "Tauros3", + .way_size_0 = SZ_8K, + .num_lock = 8, + .enable = l2c_enable, + .save = tauros3_save, + /* Tauros3 broadcasts L1 cache operations to L2 */ + .outer_cache = { + .resume = tauros3_resume, + }, +}; + +#define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns } +static const struct of_device_id l2x0_ids[] __initconst = { + L2C_ID("arm,l210-cache", of_l2c210_data), + L2C_ID("arm,l220-cache", of_l2c220_data), + L2C_ID("arm,pl310-cache", of_l2c310_data), + L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), + L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data), + L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data), + L2C_ID("marvell,tauros3-cache", of_tauros3_data), + /* Deprecated IDs */ + L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), + {} +}; + +int __init l2x0_of_init(u32 aux_val, u32 aux_mask) +{ + const struct l2c_init_data *data; + struct device_node *np; + struct resource res; + u32 cache_id, old_aux; + + np = of_find_matching_node(NULL, l2x0_ids); + if (!np) + return -ENODEV; + + if (of_address_to_resource(np, 0, &res)) + return -ENODEV; + + l2x0_base = ioremap(res.start, resource_size(&res)); + if (!l2x0_base) + return -ENOMEM; + + l2x0_saved_regs.phy_base = res.start; + + data = of_match_node(l2x0_ids, np)->data; + + if (of_device_is_compatible(np, "arm,pl310-cache") && + of_property_read_bool(np, "arm,io-coherent")) + data = &of_l2c310_coherent_data; + + old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + if (old_aux != ((old_aux & aux_mask) | aux_val)) { + pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n", + old_aux, (old_aux & aux_mask) | aux_val); + } else if (aux_mask != ~0U && aux_val != 0) { + pr_alert("L2C: platform provided aux values match the hardware, so have no effect. Please remove them.\n"); + } + + /* All L2 caches are unified, so this property should be specified */ + if (!of_property_read_bool(np, "cache-unified")) + pr_err("L2C: device tree omits to specify unified cache\n"); + + /* L2 configuration can only be changed if the cache is disabled */ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) + if (data->of_parse) + data->of_parse(np, &aux_val, &aux_mask); + + if (cache_id_part_number_from_dt) + cache_id = cache_id_part_number_from_dt; + else + cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); + + __l2c_init(data, aux_val, aux_mask, cache_id); + + return 0; +} +#endif diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S new file mode 100644 index 00000000000..8e12ddca003 --- /dev/null +++ b/arch/arm/mm/cache-nop.S @@ -0,0 +1,50 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <linux/init.h> + +#include "proc-macros.S" + +ENTRY(nop_flush_icache_all) + mov pc, lr +ENDPROC(nop_flush_icache_all) + + .globl nop_flush_kern_cache_all + .equ nop_flush_kern_cache_all, nop_flush_icache_all + + .globl nop_flush_kern_cache_louis + .equ nop_flush_kern_cache_louis, nop_flush_icache_all + + .globl nop_flush_user_cache_all + .equ nop_flush_user_cache_all, nop_flush_icache_all + + .globl nop_flush_user_cache_range + .equ nop_flush_user_cache_range, nop_flush_icache_all + + .globl nop_coherent_kern_range + .equ nop_coherent_kern_range, nop_flush_icache_all + +ENTRY(nop_coherent_user_range) + mov r0, 0 + mov pc, lr +ENDPROC(nop_coherent_user_range) + + .globl nop_flush_kern_dcache_area + .equ nop_flush_kern_dcache_area, nop_flush_icache_all + + .globl nop_dma_flush_range + .equ nop_dma_flush_range, nop_flush_icache_all + + .globl nop_dma_map_area + .equ nop_dma_map_area, nop_flush_icache_all + + .globl nop_dma_unmap_area + .equ nop_dma_unmap_area, nop_flush_icache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions nop diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c index 50868651890..b273739e635 100644 --- a/arch/arm/mm/cache-tauros2.c +++ b/arch/arm/mm/cache-tauros2.c @@ -15,7 +15,11 @@ */ #include <linux/init.h> +#include <linux/of.h> +#include <linux/of_address.h> #include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h> #include <asm/hardware/cache-tauros2.h> @@ -29,7 +33,7 @@ * outer cache operations into the kernel image if the kernel has been * configured to support a pre-v7 CPU. */ -#if __LINUX_ARM_ARCH__ < 7 +#ifdef CONFIG_CPU_32v5 /* * Low-level cache maintenance operations. */ @@ -107,6 +111,26 @@ static void tauros2_flush_range(unsigned long start, unsigned long end) dsb(); } + +static void tauros2_disable(void) +{ + __asm__ __volatile__ ( + "mcr p15, 1, %0, c7, c11, 0 @L2 Cache Clean All\n\t" + "mrc p15, 0, %0, c1, c0, 0\n\t" + "bic %0, %0, #(1 << 26)\n\t" + "mcr p15, 0, %0, c1, c0, 0 @Disable L2 Cache\n\t" + : : "r" (0x0)); +} + +static void tauros2_resume(void) +{ + __asm__ __volatile__ ( + "mcr p15, 1, %0, c7, c7, 0 @L2 Cache Invalidate All\n\t" + "mrc p15, 0, %0, c1, c0, 0\n\t" + "orr %0, %0, #(1 << 26)\n\t" + "mcr p15, 0, %0, c1, c0, 0 @Enable L2 Cache\n\t" + : : "r" (0x0)); +} #endif static inline u32 __init read_extra_features(void) @@ -123,25 +147,8 @@ static inline void __init write_extra_features(u32 u) __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); } -static void __init disable_l2_prefetch(void) -{ - u32 u; - - /* - * Read the CPU Extra Features register and verify that the - * Disable L2 Prefetch bit is set. - */ - u = read_extra_features(); - if (!(u & 0x01000000)) { - printk(KERN_INFO "Tauros2: Disabling L2 prefetch.\n"); - write_extra_features(u | 0x01000000); - } -} - static inline int __init cpuid_scheme(void) { - extern int processor_id; - return !!((processor_id & 0x000f0000) == 0x000f0000); } @@ -168,12 +175,36 @@ static inline void __init write_actlr(u32 actlr) __asm__("mcr p15, 0, %0, c1, c0, 1\n" : : "r" (actlr)); } -void __init tauros2_init(void) +static void enable_extra_feature(unsigned int features) +{ + u32 u; + + u = read_extra_features(); + + if (features & CACHE_TAUROS2_PREFETCH_ON) + u &= ~0x01000000; + else + u |= 0x01000000; + printk(KERN_INFO "Tauros2: %s L2 prefetch.\n", + (features & CACHE_TAUROS2_PREFETCH_ON) + ? "Enabling" : "Disabling"); + + if (features & CACHE_TAUROS2_LINEFILL_BURST8) + u |= 0x00100000; + else + u &= ~0x00100000; + printk(KERN_INFO "Tauros2: %s line fill burt8.\n", + (features & CACHE_TAUROS2_LINEFILL_BURST8) + ? "Enabling" : "Disabling"); + + write_extra_features(u); +} + +static void __init tauros2_internal_init(unsigned int features) { - extern int processor_id; - char *mode; + char *mode = NULL; - disable_l2_prefetch(); + enable_extra_feature(features); #ifdef CONFIG_CPU_32v5 if ((processor_id & 0xff0f0000) == 0x56050000) { @@ -193,31 +224,8 @@ void __init tauros2_init(void) outer_cache.inv_range = tauros2_inv_range; outer_cache.clean_range = tauros2_clean_range; outer_cache.flush_range = tauros2_flush_range; - } -#endif - -#ifdef CONFIG_CPU_32v6 - /* - * Check whether this CPU lacks support for the v7 hierarchical - * cache ops. (PJ4 is in its v6 personality mode if the MMFR3 - * register indicates no support for the v7 hierarchical cache - * ops.) - */ - if (cpuid_scheme() && (read_mmfr3() & 0xf) == 0) { - /* - * When Tauros2 is used in an ARMv6 system, the L2 - * enable bit is in the ARMv6 ARM-mandated position - * (bit [26] of the System Control Register). - */ - if (!(get_cr() & 0x04000000)) { - printk(KERN_INFO "Tauros2: Enabling L2 cache.\n"); - adjust_cr(0x04000000, 0x04000000); - } - - mode = "ARMv6"; - outer_cache.inv_range = tauros2_inv_range; - outer_cache.clean_range = tauros2_clean_range; - outer_cache.flush_range = tauros2_flush_range; + outer_cache.disable = tauros2_disable; + outer_cache.resume = tauros2_resume; } #endif @@ -261,3 +269,34 @@ void __init tauros2_init(void) printk(KERN_INFO "Tauros2: L2 cache support initialised " "in %s mode.\n", mode); } + +#ifdef CONFIG_OF +static const struct of_device_id tauros2_ids[] __initconst = { + { .compatible = "marvell,tauros2-cache"}, + {} +}; +#endif + +void __init tauros2_init(unsigned int features) +{ +#ifdef CONFIG_OF + struct device_node *node; + int ret; + unsigned int f; + + node = of_find_matching_node(NULL, tauros2_ids); + if (!node) { + pr_info("Not found marvell,tauros2-cache, disable it\n"); + return; + } + + ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f); + if (ret) { + pr_info("Not found marvell,tauros-cache-features property, " + "disable extra features\n"); + features = 0; + } else + features = f; +#endif + tauros2_internal_init(features); +} diff --git a/arch/arm/mm/cache-tauros3.h b/arch/arm/mm/cache-tauros3.h new file mode 100644 index 00000000000..02c0a97cbc0 --- /dev/null +++ b/arch/arm/mm/cache-tauros3.h @@ -0,0 +1,41 @@ +/* + * Marvell Tauros3 cache controller includes + * + * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> + * + * based on GPL'ed 2.6 kernel sources + * (c) Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_ARM_HARDWARE_TAUROS3_H +#define __ASM_ARM_HARDWARE_TAUROS3_H + +/* + * Marvell Tauros3 L2CC is compatible with PL310 r0p0 + * but with PREFETCH_CTRL (r2p0) and an additional event counter. + * Also, there is AUX2_CTRL for some Marvell specific control. + */ + +#define TAUROS3_EVENT_CNT2_CFG 0x224 +#define TAUROS3_EVENT_CNT2_VAL 0x228 +#define TAUROS3_INV_ALL 0x780 +#define TAUROS3_CLEAN_ALL 0x784 +#define TAUROS3_AUX2_CTRL 0x820 + +/* Registers shifts and masks */ +#define TAUROS3_AUX2_CTRL_LINEFILL_BURST8_EN (1 << 2) + +#endif diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S deleted file mode 100644 index 2e2bc406a18..00000000000 --- a/arch/arm/mm/cache-v3.S +++ /dev/null @@ -1,144 +0,0 @@ -/* - * linux/arch/arm/mm/cache-v3.S - * - * Copyright (C) 1997-2002 Russell king - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/page.h> -#include "proc-macros.S" - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(v3_flush_icache_all) - mov pc, lr -ENDPROC(v3_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - * - * - mm - mm_struct describing address space - */ -ENTRY(v3_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(v3_flush_kern_cache_all) - /* FALLTHROUGH */ - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - flags - vma_area_struct flags describing address space - */ -ENTRY(v3_flush_user_cache_range) - mov ip, #0 - mcreq p15, 0, ip, c7, c0, 0 @ flush ID cache - mov pc, lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_coherent_user_range) - mov pc, lr - -/* - * flush_kern_dcache_area(void *page, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(v3_flush_kern_dcache_area) - /* FALLTHROUGH */ - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_dma_flush_range) - mov r0, #0 - mcr p15, 0, r0, c7, c0, 0 @ flush ID cache - mov pc, lr - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v3_dma_unmap_area) - teq r2, #DMA_TO_DEVICE - bne v3_dma_flush_range - /* FALLTHROUGH */ - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v3_dma_map_area) - mov pc, lr -ENDPROC(v3_dma_unmap_area) -ENDPROC(v3_dma_map_area) - - __INITDATA - - .type v3_cache_fns, #object -ENTRY(v3_cache_fns) - .long v3_flush_icache_all - .long v3_flush_kern_cache_all - .long v3_flush_user_cache_all - .long v3_flush_user_cache_range - .long v3_coherent_kern_range - .long v3_coherent_user_range - .long v3_flush_kern_dcache_area - .long v3_dma_map_area - .long v3_dma_unmap_area - .long v3_dma_flush_range - .size v3_cache_fns, . - v3_cache_fns diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index a8fefb523f1..a7ba68f59f0 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -58,7 +58,7 @@ ENTRY(v4_flush_kern_cache_all) ENTRY(v4_flush_user_cache_range) #ifdef CONFIG_CPU_CP15 mov ip, #0 - mcreq p15, 0, ip, c7, c7, 0 @ flush ID cache + mcr p15, 0, ip, c7, c7, 0 @ flush ID cache mov pc, lr #else /* FALLTHROUGH */ @@ -88,6 +88,7 @@ ENTRY(v4_coherent_kern_range) * - end - virtual end address */ ENTRY(v4_coherent_user_range) + mov r0, #0 mov pc, lr /* @@ -139,18 +140,10 @@ ENTRY(v4_dma_map_area) ENDPROC(v4_dma_unmap_area) ENDPROC(v4_dma_map_area) + .globl v4_flush_kern_cache_louis + .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all + __INITDATA - .type v4_cache_fns, #object -ENTRY(v4_cache_fns) - .long v4_flush_icache_all - .long v4_flush_kern_cache_all - .long v4_flush_user_cache_all - .long v4_flush_user_cache_range - .long v4_coherent_kern_range - .long v4_coherent_user_range - .long v4_flush_kern_dcache_area - .long v4_dma_map_area - .long v4_dma_unmap_area - .long v4_dma_flush_range - .size v4_cache_fns, . - v4_cache_fns + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4 diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S index d3644db467b..cd494532140 100644 --- a/arch/arm/mm/cache-v4wb.S +++ b/arch/arm/mm/cache-v4wb.S @@ -32,7 +32,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. * * Size Clean (ticks) Dirty (ticks) * 4096 21 20 21 53 55 54 @@ -167,9 +167,9 @@ ENTRY(v4wb_coherent_user_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mov ip, #0 - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr @@ -251,18 +251,10 @@ ENTRY(v4wb_dma_unmap_area) mov pc, lr ENDPROC(v4wb_dma_unmap_area) + .globl v4wb_flush_kern_cache_louis + .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all + __INITDATA - .type v4wb_cache_fns, #object -ENTRY(v4wb_cache_fns) - .long v4wb_flush_icache_all - .long v4wb_flush_kern_cache_all - .long v4wb_flush_user_cache_all - .long v4wb_flush_user_cache_range - .long v4wb_coherent_kern_range - .long v4wb_coherent_user_range - .long v4wb_flush_kern_dcache_area - .long v4wb_dma_map_area - .long v4wb_dma_unmap_area - .long v4wb_dma_flush_range - .size v4wb_cache_fns, . - v4wb_cache_fns + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4wb diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index 49c2b66cf3d..11e5e5838bc 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -34,7 +34,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. * * *** This needs benchmarking */ @@ -125,6 +125,7 @@ ENTRY(v4wt_coherent_user_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b + mov r0, #0 mov pc, lr /* @@ -195,18 +196,10 @@ ENTRY(v4wt_dma_map_area) ENDPROC(v4wt_dma_unmap_area) ENDPROC(v4wt_dma_map_area) + .globl v4wt_flush_kern_cache_louis + .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all + __INITDATA - .type v4wt_cache_fns, #object -ENTRY(v4wt_cache_fns) - .long v4wt_flush_icache_all - .long v4wt_flush_kern_cache_all - .long v4wt_flush_user_cache_all - .long v4wt_flush_user_cache_range - .long v4wt_coherent_kern_range - .long v4wt_coherent_user_range - .long v4wt_flush_kern_dcache_area - .long v4wt_dma_map_area - .long v4wt_dma_unmap_area - .long v4wt_dma_flush_range - .size v4wt_cache_fns, . - v4wt_cache_fns + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4wt diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 99fa688dfad..d8fd4d4bd3d 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -12,6 +12,7 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> +#include <asm/errno.h> #include <asm/unwind.h> #include "proc-macros.S" @@ -135,7 +136,6 @@ ENTRY(v6_coherent_user_range) 1: USER( mcr p15, 0, r0, c7, c10, 1 ) @ clean D line add r0, r0, #CACHE_LINE_SIZE -2: cmp r0, r1 blo 1b #endif @@ -154,13 +154,11 @@ ENTRY(v6_coherent_user_range) /* * Fault handling for the cache operation above. If the virtual address in r0 - * isn't mapped, just try the next page. + * isn't mapped, fail with -EFAULT. */ 9001: - mov r0, r0, lsr #12 - mov r0, r0, lsl #12 - add r0, r0, #4096 - b 2b + mov r0, #-EFAULT + mov pc, lr UNWIND(.fnend ) ENDPROC(v6_coherent_user_range) ENDPROC(v6_coherent_kern_range) @@ -176,6 +174,7 @@ ENDPROC(v6_coherent_kern_range) */ ENTRY(v6_flush_kern_dcache_area) add r1, r0, r1 + bic r0, r0, #D_CACHE_LINE_SIZE - 1 1: #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line @@ -203,6 +202,10 @@ ENTRY(v6_flush_kern_dcache_area) * - end - virtual end address of region */ v6_dma_inv_range: +#ifdef CONFIG_DMA_CACHE_RWFO + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership +#endif tst r0, #D_CACHE_LINE_SIZE - 1 bic r0, r0, #D_CACHE_LINE_SIZE - 1 #ifdef HARVARD_CACHE @@ -211,6 +214,10 @@ v6_dma_inv_range: mcrne p15, 0, r0, c7, c11, 1 @ clean unified line #endif tst r1, #D_CACHE_LINE_SIZE - 1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrneb r2, [r1, #-1] @ read for ownership + strneb r2, [r1, #-1] @ write for ownership +#endif bic r1, r1, #D_CACHE_LINE_SIZE - 1 #ifdef HARVARD_CACHE mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line @@ -218,10 +225,6 @@ v6_dma_inv_range: mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line #endif 1: -#ifdef CONFIG_DMA_CACHE_RWFO - ldr r2, [r0] @ read for ownership - str r2, [r0] @ write for ownership -#endif #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c6, 1 @ invalidate D line #else @@ -229,6 +232,10 @@ v6_dma_inv_range: #endif add r0, r0, #D_CACHE_LINE_SIZE cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrlo r2, [r0] @ read for ownership + strlo r2, [r0] @ write for ownership +#endif blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer @@ -263,12 +270,12 @@ v6_dma_clean_range: * - end - virtual end address of region */ ENTRY(v6_dma_flush_range) - bic r0, r0, #D_CACHE_LINE_SIZE - 1 -1: #ifdef CONFIG_DMA_CACHE_RWFO - ldr r2, [r0] @ read for ownership - str r2, [r0] @ write for ownership + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership #endif + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +1: #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line #else @@ -276,6 +283,10 @@ ENTRY(v6_dma_flush_range) #endif add r0, r0, #D_CACHE_LINE_SIZE cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrlob r2, [r0] @ read for ownership + strlob r2, [r0] @ write for ownership +#endif blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer @@ -315,18 +326,10 @@ ENTRY(v6_dma_unmap_area) mov pc, lr ENDPROC(v6_dma_unmap_area) + .globl v6_flush_kern_cache_louis + .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all + __INITDATA - .type v6_cache_fns, #object -ENTRY(v6_cache_fns) - .long v6_flush_icache_all - .long v6_flush_kern_cache_all - .long v6_flush_user_cache_all - .long v6_flush_user_cache_range - .long v6_coherent_kern_range - .long v6_coherent_user_range - .long v6_flush_kern_dcache_area - .long v6_dma_map_area - .long v6_dma_unmap_area - .long v6_dma_flush_range - .size v6_cache_fns, . - v6_cache_fns + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v6 diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index a3ebf7a4f49..615c99e38ba 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -13,11 +13,58 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> +#include <asm/errno.h> #include <asm/unwind.h> #include "proc-macros.S" /* + * The secondary kernel init calls v7_flush_dcache_all before it enables + * the L1; however, the L1 comes out of reset in an undefined state, so + * the clean + invalidate performed by v7_flush_dcache_all causes a bunch + * of cache lines with uninitialized data and uninitialized tags to get + * written out to memory, which does really unpleasant things to the main + * processor. We fix this by performing an invalidate, rather than a + * clean + invalidate, before jumping into the kernel. + * + * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs + * to be called for both secondary cores startup and primary core resume + * procedures. + */ +ENTRY(v7_invalidate_l1) + mov r0, #0 + mcr p15, 2, r0, c0, c0, 0 + mrc p15, 1, r0, c0, c0, 0 + + ldr r1, =0x7fff + and r2, r1, r0, lsr #13 + + ldr r1, =0x3ff + + and r3, r1, r0, lsr #3 @ NumWays - 1 + add r2, r2, #1 @ NumSets + + and r0, r0, #0x7 + add r0, r0, #4 @ SetShift + + clz r1, r3 @ WayShift + add r4, r3, #1 @ NumWays +1: sub r2, r2, #1 @ NumSets-- + mov r3, r4 @ Temp = NumWays +2: subs r3, r3, #1 @ Temp-- + mov r5, r3, lsl r1 + mov r6, r2, lsl r0 + orr r5, r5, r6 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift) + mcr p15, 0, r5, c7, c6, 2 + bgt 2b + cmp r2, #0 + bgt 1b + dsb st + isb + mov pc, lr +ENDPROC(v7_invalidate_l1) + +/* * v7_flush_icache_all() * * Flush the whole I-cache. @@ -32,6 +79,34 @@ ENTRY(v7_flush_icache_all) mov pc, lr ENDPROC(v7_flush_icache_all) + /* + * v7_flush_dcache_louis() + * + * Flush the D-cache up to the Level of Unification Inner Shareable + * + * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) + */ + +ENTRY(v7_flush_dcache_louis) + dmb @ ensure ordering with previous memory accesses + mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr + ALT_SMP(ands r3, r0, #(7 << 21)) @ extract LoUIS from clidr + ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr +#ifdef CONFIG_ARM_ERRATA_643719 + ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register + ALT_UP(moveq pc, lr) @ LoUU is zero, so nothing to do + ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p? + biceq r2, r2, #0x0000000f @ clear minor revision number + teqeq r2, r1 @ test for errata affected core and if so... + orreqs r3, #(1 << 21) @ fix LoUIS value (and set flags state to 'ne') +#endif + ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2 + ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2 + moveq pc, lr @ return if level == 0 + mov r10, #0 @ r10 (starting level) = 0 + b flush_levels @ start flushing cache levels +ENDPROC(v7_flush_dcache_louis) + /* * v7_flush_dcache_all() * @@ -48,15 +123,21 @@ ENTRY(v7_flush_dcache_all) mov r3, r3, lsr #23 @ left align loc bit field beq finished @ if loc is 0, then no need to clean mov r10, #0 @ start clean at cache level 0 -loop1: +flush_levels: add r2, r10, r10, lsr #1 @ work out 3x current cache level mov r1, r0, lsr r2 @ extract cache type bits from clidr and r1, r1, #7 @ mask of the bits for current cache only cmp r1, #2 @ see what cache we have at this level blt skip @ skip if no cache, or just i-cache +#ifdef CONFIG_PREEMPT + save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic +#endif mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr isb @ isb to sych the new cssr&csidr mrc p15, 1, r1, c0, c0, 0 @ read the new csidr +#ifdef CONFIG_PREEMPT + restore_irqs_notrace r9 +#endif and r2, r1, #7 @ extract the length of the cache lines add r2, r2, #4 @ add 4 (line length offset) ldr r4, =0x3ff @@ -64,28 +145,28 @@ loop1: clz r5, r4 @ find bit position of way size increment ldr r7, =0x7fff ands r7, r7, r1, lsr #13 @ extract max number of the index size +loop1: + mov r9, r7 @ create working copy of max index loop2: - mov r9, r4 @ create working copy of max way size -loop3: - ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 - THUMB( lsl r6, r9, r5 ) + ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11 + THUMB( lsl r6, r4, r5 ) THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 - ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11 - THUMB( lsl r6, r7, r2 ) + ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11 + THUMB( lsl r6, r9, r2 ) THUMB( orr r11, r11, r6 ) @ factor index number into r11 mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way - subs r9, r9, #1 @ decrement the way - bge loop3 - subs r7, r7, #1 @ decrement the index + subs r9, r9, #1 @ decrement the index bge loop2 + subs r4, r4, #1 @ decrement the way + bge loop1 skip: add r10, r10, #2 @ increment cache number cmp r3, r10 - bgt loop1 + bgt flush_levels finished: mov r10, #0 @ swith back to cache level 0 mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr - dsb + dsb st isb mov pc, lr ENDPROC(v7_flush_dcache_all) @@ -96,7 +177,7 @@ ENDPROC(v7_flush_dcache_all) * Flush the entire cache system. * The data cache flush is now achieved using atomic clean / invalidates * working outwards from L1 cache. This is done using Set/Way based cache - * maintainance instructions. + * maintenance instructions. * The instruction cache can still be invalidated back to the point of * unification in a single instruction. * @@ -113,6 +194,24 @@ ENTRY(v7_flush_kern_cache_all) mov pc, lr ENDPROC(v7_flush_kern_cache_all) + /* + * v7_flush_kern_cache_louis(void) + * + * Flush the data cache up to Level of Unification Inner Shareable. + * Invalidate the I-cache to the point of unification. + */ +ENTRY(v7_flush_kern_cache_louis) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + bl v7_flush_dcache_louis + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(v7_flush_kern_cache_louis) + /* * v7_flush_cache_all() * @@ -173,31 +272,42 @@ ENTRY(v7_coherent_user_range) UNWIND(.fnstart ) dcache_line_size r2, r3 sub r3, r2, #1 - bic r0, r0, r3 + bic r12, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: - USER( mcr p15, 0, r0, c7, c11, 1 ) @ clean D line to the point of unification - dsb - USER( mcr p15, 0, r0, c7, c5, 1 ) @ invalidate I line - add r0, r0, r2 -2: - cmp r0, r1 + USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification + add r12, r12, r2 + cmp r12, r1 blo 1b + dsb ishst + icache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 +2: + USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line + add r12, r12, r2 + cmp r12, r1 + blo 2b mov r0, #0 ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB - dsb + dsb ishst isb mov pc, lr /* * Fault handling for the cache operation above. If the virtual address in r0 - * isn't mapped, just try the next page. + * isn't mapped, fail with -EFAULT. */ 9001: - mov r0, r0, lsr #12 - mov r0, r0, lsl #12 - add r0, r0, #4096 - b 2b +#ifdef CONFIG_ARM_ERRATA_775420 + dsb +#endif + mov r0, #-EFAULT + mov pc, lr UNWIND(.fnend ) ENDPROC(v7_coherent_kern_range) ENDPROC(v7_coherent_user_range) @@ -214,12 +324,18 @@ ENDPROC(v7_coherent_user_range) ENTRY(v7_flush_kern_dcache_area) dcache_line_size r2, r3 add r1, r0, r1 + sub r3, r2, #1 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line add r0, r0, r2 cmp r0, r1 blo 1b - dsb + dsb st mov pc, lr ENDPROC(v7_flush_kern_dcache_area) @@ -238,6 +354,10 @@ v7_dma_inv_range: sub r3, r2, #1 tst r0, r3 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line tst r1, r3 @@ -248,7 +368,7 @@ v7_dma_inv_range: add r0, r0, r2 cmp r0, r1 blo 1b - dsb + dsb st mov pc, lr ENDPROC(v7_dma_inv_range) @@ -261,12 +381,16 @@ v7_dma_clean_range: dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c10, 1 @ clean D / U line add r0, r0, r2 cmp r0, r1 blo 1b - dsb + dsb st mov pc, lr ENDPROC(v7_dma_clean_range) @@ -279,12 +403,16 @@ ENTRY(v7_dma_flush_range) dcache_line_size r2, r3 sub r3, r2, #1 bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif 1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line add r0, r0, r2 cmp r0, r1 blo 1b - dsb + dsb st mov pc, lr ENDPROC(v7_dma_flush_range) @@ -316,16 +444,5 @@ ENDPROC(v7_dma_unmap_area) __INITDATA - .type v7_cache_fns, #object -ENTRY(v7_cache_fns) - .long v7_flush_icache_all - .long v7_flush_kern_cache_all - .long v7_flush_user_cache_all - .long v7_flush_user_cache_range - .long v7_coherent_kern_range - .long v7_coherent_user_range - .long v7_flush_kern_dcache_area - .long v7_dma_map_area - .long v7_dma_unmap_area - .long v7_dma_flush_range - .size v7_cache_fns, . - v7_cache_fns + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v7 diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c index c3154928bcc..6c3edeb66e7 100644 --- a/arch/arm/mm/cache-xsc3l2.c +++ b/arch/arm/mm/cache-xsc3l2.c @@ -17,14 +17,10 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/init.h> -#include <asm/system.h> +#include <linux/highmem.h> +#include <asm/cp15.h> #include <asm/cputype.h> #include <asm/cacheflush.h> -#include <asm/kmap_types.h> -#include <asm/fixmap.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include "mm.h" #define CR_L2 (1 << 26) @@ -71,16 +67,15 @@ static inline void xsc3_l2_inv_all(void) dsb(); } +static inline void l2_unmap_va(unsigned long va) +{ #ifdef CONFIG_HIGHMEM -#define l2_map_save_flags(x) raw_local_save_flags(x) -#define l2_map_restore_flags(x) raw_local_irq_restore(x) -#else -#define l2_map_save_flags(x) ((x) = 0) -#define l2_map_restore_flags(x) ((void)(x)) + if (va != -1) + kunmap_atomic((void *)va); #endif +} -static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va, - unsigned long flags) +static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va) { #ifdef CONFIG_HIGHMEM unsigned long va = prev_va & PAGE_MASK; @@ -89,17 +84,10 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va, /* * Switching to a new page. Because cache ops are * using virtual addresses only, we must put a mapping - * in place for it. We also enable interrupts for a - * short while and disable them again to protect this - * mapping. + * in place for it. */ - unsigned long idx; - raw_local_irq_restore(flags); - idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); - va = __fix_to_virt(FIX_KMAP_BEGIN + idx); - raw_local_irq_restore(flags | PSR_I_BIT); - set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0); - local_flush_tlb_kernel_page(va); + l2_unmap_va(prev_va); + va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT); } return va + (pa_offset >> (32 - PAGE_SHIFT)); #else @@ -109,7 +97,7 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va, static void xsc3_l2_inv_range(unsigned long start, unsigned long end) { - unsigned long vaddr, flags; + unsigned long vaddr; if (start == 0 && end == -1ul) { xsc3_l2_inv_all(); @@ -117,13 +105,12 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) } vaddr = -1; /* to force the first mapping */ - l2_map_save_flags(flags); /* * Clean and invalidate partial first cache line. */ if (start & (CACHE_LINE_SIZE - 1)) { - vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags); + vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr); xsc3_l2_clean_mva(vaddr); xsc3_l2_inv_mva(vaddr); start = (start | (CACHE_LINE_SIZE - 1)) + 1; @@ -133,7 +120,7 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) * Invalidate all full cache lines between 'start' and 'end'. */ while (start < (end & ~(CACHE_LINE_SIZE - 1))) { - vaddr = l2_map_va(start, vaddr, flags); + vaddr = l2_map_va(start, vaddr); xsc3_l2_inv_mva(vaddr); start += CACHE_LINE_SIZE; } @@ -142,31 +129,30 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end) * Clean and invalidate partial last cache line. */ if (start < end) { - vaddr = l2_map_va(start, vaddr, flags); + vaddr = l2_map_va(start, vaddr); xsc3_l2_clean_mva(vaddr); xsc3_l2_inv_mva(vaddr); } - l2_map_restore_flags(flags); + l2_unmap_va(vaddr); dsb(); } static void xsc3_l2_clean_range(unsigned long start, unsigned long end) { - unsigned long vaddr, flags; + unsigned long vaddr; vaddr = -1; /* to force the first mapping */ - l2_map_save_flags(flags); start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - vaddr = l2_map_va(start, vaddr, flags); + vaddr = l2_map_va(start, vaddr); xsc3_l2_clean_mva(vaddr); start += CACHE_LINE_SIZE; } - l2_map_restore_flags(flags); + l2_unmap_va(vaddr); dsb(); } @@ -193,7 +179,7 @@ static inline void xsc3_l2_flush_all(void) static void xsc3_l2_flush_range(unsigned long start, unsigned long end) { - unsigned long vaddr, flags; + unsigned long vaddr; if (start == 0 && end == -1ul) { xsc3_l2_flush_all(); @@ -201,17 +187,16 @@ static void xsc3_l2_flush_range(unsigned long start, unsigned long end) } vaddr = -1; /* to force the first mapping */ - l2_map_save_flags(flags); start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - vaddr = l2_map_va(start, vaddr, flags); + vaddr = l2_map_va(start, vaddr); xsc3_l2_clean_mva(vaddr); xsc3_l2_inv_mva(vaddr); start += CACHE_LINE_SIZE; } - l2_map_restore_flags(flags); + l2_unmap_va(vaddr); dsb(); } diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index b0ee9ba3cfa..6eb97b3a748 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -2,6 +2,9 @@ * linux/arch/arm/mm/context.c * * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -14,144 +17,244 @@ #include <linux/percpu.h> #include <asm/mmu_context.h> +#include <asm/smp_plat.h> +#include <asm/thread_notify.h> #include <asm/tlbflush.h> - -static DEFINE_SPINLOCK(cpu_asid_lock); -unsigned int cpu_last_asid = ASID_FIRST_VERSION; -#ifdef CONFIG_SMP -DEFINE_PER_CPU(struct mm_struct *, current_mm); -#endif +#include <asm/proc-fns.h> /* - * We fork()ed a process, and we need a new context for the child - * to run in. We reserve version 0 for initial tasks so we will - * always allocate an ASID. The ASID 0 is reserved for the TTBR - * register changing sequence. + * On ARMv6, we have the following structure in the Context ID: + * + * 31 7 0 + * +-------------------------+-----------+ + * | process ID | ASID | + * +-------------------------+-----------+ + * | context ID | + * +-------------------------------------+ + * + * The ASID is used to tag entries in the CPU caches and TLBs. + * The context ID is used by debuggers and trace logic, and + * should be unique within all running processes. + * + * In big endian operation, the two 32 bit words are swapped if accessed + * by non-64-bit operations. */ -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - mm->context.id = 0; - spin_lock_init(&mm->context.id_lock); -} +#define ASID_FIRST_VERSION (1ULL << ASID_BITS) +#define NUM_USER_ASIDS ASID_FIRST_VERSION -static void flush_context(void) -{ - /* set the reserved ASID before flushing the TLB */ - asm("mcr p15, 0, %0, c13, c0, 1\n" : : "r" (0)); - isb(); - local_flush_tlb_all(); - if (icache_is_vivt_asid_tagged()) { - __flush_icache_all(); - dsb(); - } -} +static DEFINE_RAW_SPINLOCK(cpu_asid_lock); +static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); +static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -#ifdef CONFIG_SMP +static DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); +static cpumask_t tlb_flush_pending; -static void set_mm_context(struct mm_struct *mm, unsigned int asid) +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) { + int cpu; unsigned long flags; + u64 context_id, asid; - /* - * Locking needed for multi-threaded applications where the - * same mm->context.id could be set from different CPUs during - * the broadcast. This function is also called via IPI so the - * mm->context.id_lock has to be IRQ-safe. - */ - spin_lock_irqsave(&mm->context.id_lock, flags); - if (likely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) { + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + context_id = mm->context.id.counter; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; /* - * Old version of ASID found. Set the new one and - * reset mm_cpumask(mm). + * We only need to send an IPI if the other CPUs are + * running the same ASID as the one being invalidated. */ - mm->context.id = asid; - cpumask_clear(mm_cpumask(mm)); + asid = per_cpu(active_asids, cpu).counter; + if (asid == 0) + asid = per_cpu(reserved_asids, cpu); + if (context_id == asid) + cpumask_set_cpu(cpu, mask); } - spin_unlock_irqrestore(&mm->context.id_lock, flags); - - /* - * Set the mm_cpumask(mm) bit for the current CPU. - */ - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); } +#endif +#ifdef CONFIG_ARM_LPAE /* - * Reset the ASID on the current CPU. This function call is broadcast - * from the CPU handling the ASID rollover and holding cpu_asid_lock. + * With LPAE, the ASID and page tables are updated atomicly, so there is + * no need for a reserved set of tables (the active ASID tracking prevents + * any issues across a rollover). */ -static void reset_context(void *info) +#define cpu_set_reserved_ttbr0() +#else +static void cpu_set_reserved_ttbr0(void) { - unsigned int asid; - unsigned int cpu = smp_processor_id(); - struct mm_struct *mm = per_cpu(current_mm, cpu); - + u32 ttb; /* - * Check if a current_mm was set on this CPU as it might still - * be in the early booting stages and using the reserved ASID. + * Copy TTBR1 into TTBR0. + * This points at swapper_pg_dir, which contains only global + * entries so any speculative walks are perfectly safe. */ - if (!mm) - return; + asm volatile( + " mrc p15, 0, %0, c2, c0, 1 @ read TTBR1\n" + " mcr p15, 0, %0, c2, c0, 0 @ set TTBR0\n" + : "=r" (ttb)); + isb(); +} +#endif - smp_rmb(); - asid = cpu_last_asid + cpu + 1; +#ifdef CONFIG_PID_IN_CONTEXTIDR +static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd, + void *t) +{ + u32 contextidr; + pid_t pid; + struct thread_info *thread = t; - flush_context(); - set_mm_context(mm, asid); + if (cmd != THREAD_NOTIFY_SWITCH) + return NOTIFY_DONE; - /* set the new ASID */ - asm("mcr p15, 0, %0, c13, c0, 1\n" : : "r" (mm->context.id)); + pid = task_pid_nr(thread->task) << ASID_BITS; + asm volatile( + " mrc p15, 0, %0, c13, c0, 1\n" + " and %0, %0, %2\n" + " orr %0, %0, %1\n" + " mcr p15, 0, %0, c13, c0, 1\n" + : "=r" (contextidr), "+r" (pid) + : "I" (~ASID_MASK)); isb(); + + return NOTIFY_OK; } -#else +static struct notifier_block contextidr_notifier_block = { + .notifier_call = contextidr_notifier, +}; -static inline void set_mm_context(struct mm_struct *mm, unsigned int asid) +static int __init contextidr_notifier_init(void) { - mm->context.id = asid; - cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); + return thread_register_notifier(&contextidr_notifier_block); } - +arch_initcall(contextidr_notifier_init); #endif -void __new_context(struct mm_struct *mm) +static void flush_context(unsigned int cpu) { - unsigned int asid; + int i; + u64 asid; - spin_lock(&cpu_asid_lock); -#ifdef CONFIG_SMP - /* - * Check the ASID again, in case the change was broadcast from - * another CPU before we acquired the lock. - */ - if (unlikely(((mm->context.id ^ cpu_last_asid) >> ASID_BITS) == 0)) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - spin_unlock(&cpu_asid_lock); - return; + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + for_each_possible_cpu(i) { + if (i == cpu) { + asid = 0; + } else { + asid = atomic64_xchg(&per_cpu(active_asids, i), 0); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); + } + per_cpu(reserved_asids, i) = asid; } -#endif - /* - * At this point, it is guaranteed that the current mm (with - * an old ASID) isn't active on any other CPU since the ASIDs - * are changed simultaneously via IPI. - */ - asid = ++cpu_last_asid; - if (asid == 0) - asid = cpu_last_asid = ASID_FIRST_VERSION; + + /* Queue a TLB invalidate and flush the I-cache if necessary. */ + cpumask_setall(&tlb_flush_pending); + + if (icache_is_vivt_asid_tagged()) + __flush_icache_all(); +} + +static int is_reserved_asid(u64 asid) +{ + int cpu; + for_each_possible_cpu(cpu) + if (per_cpu(reserved_asids, cpu) == asid) + return 1; + return 0; +} + +static u64 new_context(struct mm_struct *mm, unsigned int cpu) +{ + static u32 cur_idx = 1; + u64 asid = atomic64_read(&mm->context.id); + u64 generation = atomic64_read(&asid_generation); + + if (asid != 0 && is_reserved_asid(asid)) { + /* + * Our current ASID was active during a rollover, we can + * continue to use it and this was just a false alarm. + */ + asid = generation | (asid & ~ASID_MASK); + } else { + /* + * Allocate a free ASID. If we can't find one, take a + * note of the currently active ASIDs and mark the TLBs + * as requiring flushes. We always count from ASID #1, + * as we reserve ASID #0 to switch via TTBR0 and to + * avoid speculative page table walks from hitting in + * any partial walk caches, which could be populated + * from overlapping level-1 descriptors used to map both + * the module area and the userspace stack. + */ + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); + if (asid == NUM_USER_ASIDS) { + generation = atomic64_add_return(ASID_FIRST_VERSION, + &asid_generation); + flush_context(cpu); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); + } + __set_bit(asid, asid_map); + cur_idx = asid; + asid |= generation; + cpumask_clear(mm_cpumask(mm)); + } + + return asid; +} + +void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) +{ + unsigned long flags; + unsigned int cpu = smp_processor_id(); + u64 asid; + + if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) + __check_vmalloc_seq(mm); /* - * If we've used up all our ASIDs, we need - * to start a new version and flush the TLB. + * We cannot update the pgd and the ASID atomicly with classic + * MMU, so switch exclusively to global mappings to avoid + * speculative page table walking with the wrong TTBR. */ - if (unlikely((asid & ~ASID_MASK) == 0)) { - asid = cpu_last_asid + smp_processor_id() + 1; - flush_context(); -#ifdef CONFIG_SMP - smp_wmb(); - smp_call_function(reset_context, NULL, 1); -#endif - cpu_last_asid += NR_CPUS; + cpu_set_reserved_ttbr0(); + + asid = atomic64_read(&mm->context.id); + if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) + && atomic64_xchg(&per_cpu(active_asids, cpu), asid)) + goto switch_mm_fastpath; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + /* Check that our ASID belongs to the current generation. */ + asid = atomic64_read(&mm->context.id); + if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) { + asid = new_context(mm, cpu); + atomic64_set(&mm->context.id, asid); + } + + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { + local_flush_bp_all(); + local_flush_tlb_all(); } - set_mm_context(mm, asid); - spin_unlock(&cpu_asid_lock); + atomic64_set(&per_cpu(active_asids, cpu), asid); + cpumask_set_cpu(cpu, mm_cpumask(mm)); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + +switch_mm_fastpath: + cpu_switch_mm(mm->pgd, mm); } diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c index d2852e1635b..d130a5ece5d 100644 --- a/arch/arm/mm/copypage-fa.c +++ b/arch/arm/mm/copypage-fa.c @@ -44,11 +44,11 @@ void fa_copy_user_highpage(struct page *to, struct page *from, { void *kto, *kfrom; - kto = kmap_atomic(to, KM_USER0); - kfrom = kmap_atomic(from, KM_USER1); + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); fa_copy_user_page(kto, kfrom); - kunmap_atomic(kfrom, KM_USER1); - kunmap_atomic(kto, KM_USER0); + kunmap_atomic(kfrom); + kunmap_atomic(kto); } /* @@ -58,7 +58,7 @@ void fa_copy_user_highpage(struct page *to, struct page *from, */ void fa_clear_user_highpage(struct page *page, unsigned long vaddr) { - void *ptr, *kaddr = kmap_atomic(page, KM_USER0); + void *ptr, *kaddr = kmap_atomic(page); asm volatile("\ mov r1, %2 @ 1\n\ mov r2, #0 @ 1\n\ @@ -77,7 +77,7 @@ void fa_clear_user_highpage(struct page *page, unsigned long vaddr) : "=r" (ptr) : "0" (kaddr), "I" (PAGE_SIZE / 32) : "r1", "r2", "r3", "ip", "lr"); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } struct cpu_user_fns fa_user_fns __initdata = { diff --git a/arch/arm/mm/copypage-feroceon.c b/arch/arm/mm/copypage-feroceon.c index ac163de7dc0..49ee0c1a720 100644 --- a/arch/arm/mm/copypage-feroceon.c +++ b/arch/arm/mm/copypage-feroceon.c @@ -72,17 +72,17 @@ void feroceon_copy_user_highpage(struct page *to, struct page *from, { void *kto, *kfrom; - kto = kmap_atomic(to, KM_USER0); - kfrom = kmap_atomic(from, KM_USER1); + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); flush_cache_page(vma, vaddr, page_to_pfn(from)); feroceon_copy_user_page(kto, kfrom); - kunmap_atomic(kfrom, KM_USER1); - kunmap_atomic(kto, KM_USER0); + kunmap_atomic(kfrom); + kunmap_atomic(kto); } void feroceon_clear_user_highpage(struct page *page, unsigned long vaddr) { - void *ptr, *kaddr = kmap_atomic(page, KM_USER0); + void *ptr, *kaddr = kmap_atomic(page); asm volatile ("\ mov r1, %2 \n\ mov r2, #0 \n\ @@ -102,7 +102,7 @@ void feroceon_clear_user_highpage(struct page *page, unsigned long vaddr) : "=r" (ptr) : "0" (kaddr), "I" (PAGE_SIZE / 32) : "r1", "r2", "r3", "r4", "r5", "r6", "r7", "ip", "lr"); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } struct cpu_user_fns feroceon_user_fns __initdata = { diff --git a/arch/arm/mm/copypage-v3.c b/arch/arm/mm/copypage-v3.c deleted file mode 100644 index f72303e1d80..00000000000 --- a/arch/arm/mm/copypage-v3.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * linux/arch/arm/mm/copypage-v3.c - * - * Copyright (C) 1995-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/init.h> -#include <linux/highmem.h> - -/* - * ARMv3 optimised copy_user_highpage - * - * FIXME: do we need to handle cache stuff... - */ -static void __naked -v3_copy_user_page(void *kto, const void *kfrom) -{ - asm("\n\ - stmfd sp!, {r4, lr} @ 2\n\ - mov r2, %2 @ 1\n\ - ldmia %0!, {r3, r4, ip, lr} @ 4+1\n\ -1: stmia %1!, {r3, r4, ip, lr} @ 4\n\ - ldmia %0!, {r3, r4, ip, lr} @ 4+1\n\ - stmia %1!, {r3, r4, ip, lr} @ 4\n\ - ldmia %0!, {r3, r4, ip, lr} @ 4+1\n\ - stmia %1!, {r3, r4, ip, lr} @ 4\n\ - ldmia %0!, {r3, r4, ip, lr} @ 4\n\ - subs r2, r2, #1 @ 1\n\ - stmia %1!, {r3, r4, ip, lr} @ 4\n\ - ldmneia %0!, {r3, r4, ip, lr} @ 4\n\ - bne 1b @ 1\n\ - ldmfd sp!, {r4, pc} @ 3" - : - : "r" (kfrom), "r" (kto), "I" (PAGE_SIZE / 64)); -} - -void v3_copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr, struct vm_area_struct *vma) -{ - void *kto, *kfrom; - - kto = kmap_atomic(to, KM_USER0); - kfrom = kmap_atomic(from, KM_USER1); - v3_copy_user_page(kto, kfrom); - kunmap_atomic(kfrom, KM_USER1); - kunmap_atomic(kto, KM_USER0); -} - -/* - * ARMv3 optimised clear_user_page - * - * FIXME: do we need to handle cache stuff... - */ -void v3_clear_user_highpage(struct page *page, unsigned long vaddr) -{ - void *ptr, *kaddr = kmap_atomic(page, KM_USER0); - asm volatile("\n\ - mov r1, %2 @ 1\n\ - mov r2, #0 @ 1\n\ - mov r3, #0 @ 1\n\ - mov ip, #0 @ 1\n\ - mov lr, #0 @ 1\n\ -1: stmia %0!, {r2, r3, ip, lr} @ 4\n\ - stmia %0!, {r2, r3, ip, lr} @ 4\n\ - stmia %0!, {r2, r3, ip, lr} @ 4\n\ - stmia %0!, {r2, r3, ip, lr} @ 4\n\ - subs r1, r1, #1 @ 1\n\ - bne 1b @ 1" - : "=r" (ptr) - : "0" (kaddr), "I" (PAGE_SIZE / 64) - : "r1", "r2", "r3", "ip", "lr"); - kunmap_atomic(kaddr, KM_USER0); -} - -struct cpu_user_fns v3_user_fns __initdata = { - .cpu_clear_user_highpage = v3_clear_user_highpage, - .cpu_copy_user_highpage = v3_copy_user_highpage, -}; diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c index b8061519ce7..1267e64133b 100644 --- a/arch/arm/mm/copypage-v4mc.c +++ b/arch/arm/mm/copypage-v4mc.c @@ -23,14 +23,10 @@ #include "mm.h" -/* - * 0xffff8000 to 0xffffffff is reserved for any ARM architecture - * specific hacks for copying pages efficiently. - */ #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ L_PTE_MT_MINICACHE) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * ARMv4 mini-dcache optimised copy_user_highpage @@ -71,21 +67,20 @@ mc_copy_user_page(void *from, void *to) void v4_mc_copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { - void *kto = kmap_atomic(to, KM_USER1); + void *kto = kmap_atomic(to); if (!test_and_set_bit(PG_dcache_clean, &from->flags)) __flush_dcache_page(page_mapping(from), from); - spin_lock(&minicache_lock); + raw_spin_lock(&minicache_lock); - set_pte_ext(TOP_PTE(0xffff8000), pfn_pte(page_to_pfn(from), minicache_pgprot), 0); - flush_tlb_kernel_page(0xffff8000); + set_top_pte(COPYPAGE_MINICACHE, mk_pte(from, minicache_pgprot)); - mc_copy_user_page((void *)0xffff8000, kto); + mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); - spin_unlock(&minicache_lock); + raw_spin_unlock(&minicache_lock); - kunmap_atomic(kto, KM_USER1); + kunmap_atomic(kto); } /* @@ -93,7 +88,7 @@ void v4_mc_copy_user_highpage(struct page *to, struct page *from, */ void v4_mc_clear_user_highpage(struct page *page, unsigned long vaddr) { - void *ptr, *kaddr = kmap_atomic(page, KM_USER0); + void *ptr, *kaddr = kmap_atomic(page); asm volatile("\ mov r1, %2 @ 1\n\ mov r2, #0 @ 1\n\ @@ -111,7 +106,7 @@ void v4_mc_clear_user_highpage(struct page *page, unsigned long vaddr) : "=r" (ptr) : "0" (kaddr), "I" (PAGE_SIZE / 64) : "r1", "r2", "r3", "ip", "lr"); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } struct cpu_user_fns v4_mc_user_fns __initdata = { diff --git a/arch/arm/mm/copypage-v4wb.c b/arch/arm/mm/copypage-v4wb.c index cb589cbb2b6..067d0fdd630 100644 --- a/arch/arm/mm/copypage-v4wb.c +++ b/arch/arm/mm/copypage-v4wb.c @@ -52,12 +52,12 @@ void v4wb_copy_user_highpage(struct page *to, struct page *from, { void *kto, *kfrom; - kto = kmap_atomic(to, KM_USER0); - kfrom = kmap_atomic(from, KM_USER1); + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); flush_cache_page(vma, vaddr, page_to_pfn(from)); v4wb_copy_user_page(kto, kfrom); - kunmap_atomic(kfrom, KM_USER1); - kunmap_atomic(kto, KM_USER0); + kunmap_atomic(kfrom); + kunmap_atomic(kto); } /* @@ -67,7 +67,7 @@ void v4wb_copy_user_highpage(struct page *to, struct page *from, */ void v4wb_clear_user_highpage(struct page *page, unsigned long vaddr) { - void *ptr, *kaddr = kmap_atomic(page, KM_USER0); + void *ptr, *kaddr = kmap_atomic(page); asm volatile("\ mov r1, %2 @ 1\n\ mov r2, #0 @ 1\n\ @@ -86,7 +86,7 @@ void v4wb_clear_user_highpage(struct page *page, unsigned long vaddr) : "=r" (ptr) : "0" (kaddr), "I" (PAGE_SIZE / 64) : "r1", "r2", "r3", "ip", "lr"); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } struct cpu_user_fns v4wb_user_fns __initdata = { diff --git a/arch/arm/mm/copypage-v4wt.c b/arch/arm/mm/copypage-v4wt.c index 30c7d048a32..b85c5da2e51 100644 --- a/arch/arm/mm/copypage-v4wt.c +++ b/arch/arm/mm/copypage-v4wt.c @@ -48,11 +48,11 @@ void v4wt_copy_user_highpage(struct page *to, struct page *from, { void *kto, *kfrom; - kto = kmap_atomic(to, KM_USER0); - kfrom = kmap_atomic(from, KM_USER1); + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); v4wt_copy_user_page(kto, kfrom); - kunmap_atomic(kfrom, KM_USER1); - kunmap_atomic(kto, KM_USER0); + kunmap_atomic(kfrom); + kunmap_atomic(kto); } /* @@ -62,7 +62,7 @@ void v4wt_copy_user_highpage(struct page *to, struct page *from, */ void v4wt_clear_user_highpage(struct page *page, unsigned long vaddr) { - void *ptr, *kaddr = kmap_atomic(page, KM_USER0); + void *ptr, *kaddr = kmap_atomic(page); asm volatile("\ mov r1, %2 @ 1\n\ mov r2, #0 @ 1\n\ @@ -79,7 +79,7 @@ void v4wt_clear_user_highpage(struct page *page, unsigned long vaddr) : "=r" (ptr) : "0" (kaddr), "I" (PAGE_SIZE / 64) : "r1", "r2", "r3", "ip", "lr"); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } struct cpu_user_fns v4wt_user_fns __initdata = { diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index bdba6c65c90..b9bcc9d7917 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -24,10 +24,7 @@ #error FIX ME #endif -#define from_address (0xffff8000) -#define to_address (0xffffc000) - -static DEFINE_SPINLOCK(v6_lock); +static DEFINE_RAW_SPINLOCK(v6_lock); /* * Copy the user page. No aliasing to deal with so we can just @@ -38,12 +35,11 @@ static void v6_copy_user_highpage_nonaliasing(struct page *to, { void *kto, *kfrom; - kfrom = kmap_atomic(from, KM_USER0); - kto = kmap_atomic(to, KM_USER1); + kfrom = kmap_atomic(from); + kto = kmap_atomic(to); copy_page(kto, kfrom); - __cpuc_flush_dcache_area(kto, PAGE_SIZE); - kunmap_atomic(kto, KM_USER1); - kunmap_atomic(kfrom, KM_USER0); + kunmap_atomic(kto); + kunmap_atomic(kfrom); } /* @@ -52,9 +48,9 @@ static void v6_copy_user_highpage_nonaliasing(struct page *to, */ static void v6_clear_user_highpage_nonaliasing(struct page *page, unsigned long vaddr) { - void *kaddr = kmap_atomic(page, KM_USER0); + void *kaddr = kmap_atomic(page); clear_page(kaddr); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } /* @@ -89,20 +85,17 @@ static void v6_copy_user_highpage_aliasing(struct page *to, * Now copy the page using the same cache colour as the * pages ultimate destination. */ - spin_lock(&v6_lock); - - set_pte_ext(TOP_PTE(from_address) + offset, pfn_pte(page_to_pfn(from), PAGE_KERNEL), 0); - set_pte_ext(TOP_PTE(to_address) + offset, pfn_pte(page_to_pfn(to), PAGE_KERNEL), 0); + raw_spin_lock(&v6_lock); - kfrom = from_address + (offset << PAGE_SHIFT); - kto = to_address + (offset << PAGE_SHIFT); + kfrom = COPYPAGE_V6_FROM + (offset << PAGE_SHIFT); + kto = COPYPAGE_V6_TO + (offset << PAGE_SHIFT); - flush_tlb_kernel_page(kfrom); - flush_tlb_kernel_page(kto); + set_top_pte(kfrom, mk_pte(from, PAGE_KERNEL)); + set_top_pte(kto, mk_pte(to, PAGE_KERNEL)); copy_page((void *)kto, (void *)kfrom); - spin_unlock(&v6_lock); + raw_spin_unlock(&v6_lock); } /* @@ -112,8 +105,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to, */ static void v6_clear_user_highpage_aliasing(struct page *page, unsigned long vaddr) { - unsigned int offset = CACHE_COLOUR(vaddr); - unsigned long to = to_address + (offset << PAGE_SHIFT); + unsigned long to = COPYPAGE_V6_TO + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); /* FIXME: not highmem safe */ discard_old_kernel_data(page_address(page)); @@ -122,13 +114,12 @@ static void v6_clear_user_highpage_aliasing(struct page *page, unsigned long vad * Now clear the page using the same cache colour as * the pages ultimate destination. */ - spin_lock(&v6_lock); + raw_spin_lock(&v6_lock); - set_pte_ext(TOP_PTE(to_address) + offset, pfn_pte(page_to_pfn(page), PAGE_KERNEL), 0); - flush_tlb_kernel_page(to); + set_top_pte(to, mk_pte(page, PAGE_KERNEL)); clear_page((void *)to); - spin_unlock(&v6_lock); + raw_spin_unlock(&v6_lock); } struct cpu_user_fns v6_user_fns __initdata = { diff --git a/arch/arm/mm/copypage-xsc3.c b/arch/arm/mm/copypage-xsc3.c index f9cde0702f1..03a2042aced 100644 --- a/arch/arm/mm/copypage-xsc3.c +++ b/arch/arm/mm/copypage-xsc3.c @@ -75,12 +75,12 @@ void xsc3_mc_copy_user_highpage(struct page *to, struct page *from, { void *kto, *kfrom; - kto = kmap_atomic(to, KM_USER0); - kfrom = kmap_atomic(from, KM_USER1); + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); flush_cache_page(vma, vaddr, page_to_pfn(from)); xsc3_mc_copy_user_page(kto, kfrom); - kunmap_atomic(kfrom, KM_USER1); - kunmap_atomic(kto, KM_USER0); + kunmap_atomic(kfrom); + kunmap_atomic(kto); } /* @@ -90,7 +90,7 @@ void xsc3_mc_copy_user_highpage(struct page *to, struct page *from, */ void xsc3_mc_clear_user_highpage(struct page *page, unsigned long vaddr) { - void *ptr, *kaddr = kmap_atomic(page, KM_USER0); + void *ptr, *kaddr = kmap_atomic(page); asm volatile ("\ mov r1, %2 \n\ mov r2, #0 \n\ @@ -105,7 +105,7 @@ void xsc3_mc_clear_user_highpage(struct page *page, unsigned long vaddr) : "=r" (ptr) : "0" (kaddr), "I" (PAGE_SIZE / 32) : "r1", "r2", "r3"); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } struct cpu_user_fns xsc3_mc_user_fns __initdata = { diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c index 649bbcd325b..0fb85025344 100644 --- a/arch/arm/mm/copypage-xscale.c +++ b/arch/arm/mm/copypage-xscale.c @@ -23,16 +23,10 @@ #include "mm.h" -/* - * 0xffff8000 to 0xffffffff is reserved for any ARM architecture - * specific hacks for copying pages efficiently. - */ -#define COPYPAGE_MINICACHE 0xffff8000 - #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ L_PTE_MT_MINICACHE) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * XScale mini-dcache optimised copy_user_highpage @@ -93,21 +87,20 @@ mc_copy_user_page(void *from, void *to) void xscale_mc_copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { - void *kto = kmap_atomic(to, KM_USER1); + void *kto = kmap_atomic(to); if (!test_and_set_bit(PG_dcache_clean, &from->flags)) __flush_dcache_page(page_mapping(from), from); - spin_lock(&minicache_lock); + raw_spin_lock(&minicache_lock); - set_pte_ext(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(page_to_pfn(from), minicache_pgprot), 0); - flush_tlb_kernel_page(COPYPAGE_MINICACHE); + set_top_pte(COPYPAGE_MINICACHE, mk_pte(from, minicache_pgprot)); mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); - spin_unlock(&minicache_lock); + raw_spin_unlock(&minicache_lock); - kunmap_atomic(kto, KM_USER1); + kunmap_atomic(kto); } /* @@ -116,7 +109,7 @@ void xscale_mc_copy_user_highpage(struct page *to, struct page *from, void xscale_mc_clear_user_highpage(struct page *page, unsigned long vaddr) { - void *ptr, *kaddr = kmap_atomic(page, KM_USER0); + void *ptr, *kaddr = kmap_atomic(page); asm volatile( "mov r1, %2 \n\ mov r2, #0 \n\ @@ -133,7 +126,7 @@ xscale_mc_clear_user_highpage(struct page *page, unsigned long vaddr) : "=r" (ptr) : "0" (kaddr), "I" (PAGE_SIZE / 32) : "r1", "r2", "r3", "ip"); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } struct cpu_user_fns xscale_mc_user_fns __initdata = { diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ac6a36142fc..1f88db06b13 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -9,6 +9,7 @@ * * DMA uncached mapping support. */ +#include <linux/bootmem.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/gfp.h> @@ -17,16 +18,187 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/dma-mapping.h> +#include <linux/dma-contiguous.h> +#include <linux/highmem.h> +#include <linux/memblock.h> +#include <linux/slab.h> +#include <linux/iommu.h> +#include <linux/io.h> +#include <linux/vmalloc.h> +#include <linux/sizes.h> #include <asm/memory.h> #include <asm/highmem.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> -#include <asm/sizes.h> +#include <asm/mach/arch.h> +#include <asm/dma-iommu.h> +#include <asm/mach/map.h> +#include <asm/system_info.h> +#include <asm/dma-contiguous.h> + +#include "mm.h" + +/* + * The DMA API is built upon the notion of "buffer ownership". A buffer + * is either exclusively owned by the CPU (and therefore may be accessed + * by it) or exclusively owned by the DMA device. These helper functions + * represent the transitions between these two ownership states. + * + * Note, however, that on later ARMs, this notion does not work due to + * speculative prefetches. We model our approach on the assumption that + * the CPU does do speculative prefetches, which means we clean caches + * before transfers and delay cache invalidation until transfer completion. + * + */ +static void __dma_page_cpu_to_dev(struct page *, unsigned long, + size_t, enum dma_data_direction); +static void __dma_page_dev_to_cpu(struct page *, unsigned long, + size_t, enum dma_data_direction); + +/** + * arm_dma_map_page - map a portion of a page for streaming DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Ensure that any data held in the cache is appropriately discarded + * or written back. + * + * The device owns this memory once this call has completed. The CPU + * can regain ownership by calling dma_unmap_page(). + */ +static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_cpu_to_dev(page, offset, size, dir); + return pfn_to_dma(dev, page_to_pfn(page)) + offset; +} + +static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return pfn_to_dma(dev, page_to_pfn(page)) + offset; +} + +/** + * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * Unmap a page streaming mode DMA translation. The handle and size + * must match what was provided in the previous dma_map_page() call. + * All other usages are undefined. + * + * After this call, reads by the CPU to the buffer are guaranteed to see + * whatever the device wrote there. + */ +static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), + handle & ~PAGE_MASK, size, dir); +} + +static void arm_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned int offset = handle & (PAGE_SIZE - 1); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); + __dma_page_dev_to_cpu(page, offset, size, dir); +} + +static void arm_dma_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned int offset = handle & (PAGE_SIZE - 1); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); + __dma_page_cpu_to_dev(page, offset, size, dir); +} + +struct dma_map_ops arm_dma_ops = { + .alloc = arm_dma_alloc, + .free = arm_dma_free, + .mmap = arm_dma_mmap, + .get_sgtable = arm_dma_get_sgtable, + .map_page = arm_dma_map_page, + .unmap_page = arm_dma_unmap_page, + .map_sg = arm_dma_map_sg, + .unmap_sg = arm_dma_unmap_sg, + .sync_single_for_cpu = arm_dma_sync_single_for_cpu, + .sync_single_for_device = arm_dma_sync_single_for_device, + .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, + .sync_sg_for_device = arm_dma_sync_sg_for_device, + .set_dma_mask = arm_dma_set_mask, +}; +EXPORT_SYMBOL(arm_dma_ops); + +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs); +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs); + +struct dma_map_ops arm_coherent_dma_ops = { + .alloc = arm_coherent_dma_alloc, + .free = arm_coherent_dma_free, + .mmap = arm_dma_mmap, + .get_sgtable = arm_dma_get_sgtable, + .map_page = arm_coherent_dma_map_page, + .map_sg = arm_dma_map_sg, + .set_dma_mask = arm_dma_set_mask, +}; +EXPORT_SYMBOL(arm_coherent_dma_ops); + +static int __dma_supported(struct device *dev, u64 mask, bool warn) +{ + unsigned long max_dma_pfn; + + /* + * If the mask allows for more memory than we can address, + * and we actually have that much memory, then we must + * indicate that DMA to this device is not supported. + */ + if (sizeof(mask) != sizeof(dma_addr_t) && + mask > (dma_addr_t)~0 && + dma_to_pfn(dev, ~0) < max_pfn) { + if (warn) { + dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", + mask); + dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n"); + } + return 0; + } + + max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + + /* + * Translate the device's DMA mask to a PFN limit. This + * PFN number includes the page which we can DMA to. + */ + if (dma_to_pfn(dev, mask) < max_dma_pfn) { + if (warn) + dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n", + mask, + dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1, + max_dma_pfn + 1); + return 0; + } + + return 1; +} static u64 get_coherent_dma_mask(struct device *dev) { - u64 mask = ISA_DMA_THRESHOLD; + u64 mask = (u64)DMA_BIT_MASK(32); if (dev) { mask = dev->coherent_dma_mask; @@ -40,17 +212,39 @@ static u64 get_coherent_dma_mask(struct device *dev) return 0; } - if ((~mask) & ISA_DMA_THRESHOLD) { - dev_warn(dev, "coherent DMA mask %#llx is smaller " - "than system GFP_DMA mask %#llx\n", - mask, (unsigned long long)ISA_DMA_THRESHOLD); + if (!__dma_supported(dev, mask, true)) return 0; - } } return mask; } +static void __dma_clear_buffer(struct page *page, size_t size) +{ + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + if (PageHighMem(page)) { + phys_addr_t base = __pfn_to_phys(page_to_pfn(page)); + phys_addr_t end = base + size; + while (size > 0) { + void *ptr = kmap_atomic(page); + memset(ptr, 0, PAGE_SIZE); + dmac_flush_range(ptr, ptr + PAGE_SIZE); + kunmap_atomic(ptr); + page++; + size -= PAGE_SIZE; + } + outer_flush_range(base, end); + } else { + void *ptr = page_address(page); + memset(ptr, 0, size); + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); + } +} + /* * Allocate a DMA buffer for 'dev' of size 'size' using the * specified gfp mask. Note that 'size' must be page aligned. @@ -59,23 +253,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf { unsigned long order = get_order(size); struct page *page, *p, *e; - void *ptr; - u64 mask = get_coherent_dma_mask(dev); - -#ifdef CONFIG_DMA_API_DEBUG - u64 limit = (mask + 1) & ~mask; - if (limit && size >= limit) { - dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", - size, mask); - return NULL; - } -#endif - - if (!mask) - return NULL; - - if (mask < 0xffffffffULL) - gfp |= GFP_DMA; page = alloc_pages(gfp, order); if (!page) @@ -88,14 +265,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) __free_page(p); - /* - * Ensure that the allocated pages are zeroed, and that any data - * lurking in the kernel direct-mapped region is invalidated. - */ - ptr = page_address(page); - memset(ptr, 0, size); - dmac_flush_range(ptr, ptr + size); - outer_flush_range(__pa(ptr), __pa(ptr) + size); + __dma_clear_buffer(page, size); return page; } @@ -114,204 +284,439 @@ static void __dma_free_buffer(struct page *page, size_t size) } #ifdef CONFIG_MMU -/* Sanity check size */ -#if (CONSISTENT_DMA_SIZE % SZ_2M) -#error "CONSISTENT_DMA_SIZE must be multiple of 2MiB" -#endif -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) -#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PGDIR_SHIFT) -#define NUM_CONSISTENT_PTES (CONSISTENT_DMA_SIZE >> PGDIR_SHIFT) +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page, + const void *caller); + +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller); + +static void * +__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) +{ + struct vm_struct *area; + unsigned long addr; + + /* + * DMA allocation can be mapped to user space, so lets + * set VM_USERMAP flags too. + */ + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + area->phys_addr = __pfn_to_phys(page_to_pfn(page)); + + if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { + vunmap((void *)addr); + return NULL; + } + return (void *)addr; +} + +static void __dma_free_remap(void *cpu_addr, size_t size) +{ + unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP; + struct vm_struct *area = find_vm_area(cpu_addr); + if (!area || (area->flags & flags) != flags) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); +} + +#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K + +struct dma_pool { + size_t size; + spinlock_t lock; + unsigned long *bitmap; + unsigned long nr_pages; + void *vaddr; + struct page **pages; +}; + +static struct dma_pool atomic_pool = { + .size = DEFAULT_DMA_COHERENT_POOL_SIZE, +}; + +static int __init early_coherent_pool(char *p) +{ + atomic_pool.size = memparse(p, &p); + return 0; +} +early_param("coherent_pool", early_coherent_pool); + +void __init init_dma_coherent_pool_size(unsigned long size) +{ + /* + * Catch any attempt to set the pool size too late. + */ + BUG_ON(atomic_pool.vaddr); + + /* + * Set architecture specific coherent pool size only if + * it has not been changed by kernel command line parameter. + */ + if (atomic_pool.size == DEFAULT_DMA_COHERENT_POOL_SIZE) + atomic_pool.size = size; +} /* - * These are the page tables (2MB each) covering uncached, DMA consistent allocations + * Initialise the coherent pool for atomic allocations. */ -static pte_t *consistent_pte[NUM_CONSISTENT_PTES]; +static int __init atomic_pool_init(void) +{ + struct dma_pool *pool = &atomic_pool; + pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL); + gfp_t gfp = GFP_KERNEL | GFP_DMA; + unsigned long nr_pages = pool->size >> PAGE_SHIFT; + unsigned long *bitmap; + struct page *page; + struct page **pages; + void *ptr; + int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long); -#include "vmregion.h" + bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!bitmap) + goto no_bitmap; -static struct arm_vmregion_head consistent_head = { - .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_start = CONSISTENT_BASE, - .vm_end = CONSISTENT_END, -}; + pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto no_pages; -#ifdef CONFIG_HUGETLB_PAGE -#error ARM Coherent DMA allocator does not (yet) support huge TLB -#endif + if (dev_get_cma_area(NULL)) + ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, + atomic_pool_init); + else + ptr = __alloc_remap_buffer(NULL, pool->size, gfp, prot, &page, + atomic_pool_init); + if (ptr) { + int i; + + for (i = 0; i < nr_pages; i++) + pages[i] = page + i; + + spin_lock_init(&pool->lock); + pool->vaddr = ptr; + pool->pages = pages; + pool->bitmap = bitmap; + pool->nr_pages = nr_pages; + pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", + (unsigned)pool->size / 1024); + return 0; + } + kfree(pages); +no_pages: + kfree(bitmap); +no_bitmap: + pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", + (unsigned)pool->size / 1024); + return -ENOMEM; +} /* - * Initialise the consistent memory allocation. + * CMA is activated by core_initcall, so we must be called after it. */ -static int __init consistent_init(void) +postcore_initcall(atomic_pool_init); + +struct dma_contig_early_reserve { + phys_addr_t base; + unsigned long size; +}; + +static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; + +static int dma_mmu_remap_num __initdata; + +void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { - int ret = 0; - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int i = 0; - u32 base = CONSISTENT_BASE; + dma_mmu_remap[dma_mmu_remap_num].base = base; + dma_mmu_remap[dma_mmu_remap_num].size = size; + dma_mmu_remap_num++; +} - do { - pgd = pgd_offset(&init_mm, base); - pmd = pmd_alloc(&init_mm, pgd, base); - if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - WARN_ON(!pmd_none(*pmd)); +void __init dma_contiguous_remap(void) +{ + int i; + for (i = 0; i < dma_mmu_remap_num; i++) { + phys_addr_t start = dma_mmu_remap[i].base; + phys_addr_t end = start + dma_mmu_remap[i].size; + struct map_desc map; + unsigned long addr; + + if (end > arm_lowmem_limit) + end = arm_lowmem_limit; + if (start >= end) + continue; - pte = pte_alloc_kernel(pmd, base); - if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY_DMA_READY; - consistent_pte[i++] = pte; - base += (1 << PGDIR_SHIFT); - } while (base < CONSISTENT_END); + /* + * Clear previous low-memory mapping to ensure that the + * TLB does not see any conflicting entries, then flush + * the TLB of the old entries before creating new mappings. + * + * This ensures that any speculatively loaded TLB entries + * (even though they may be rare) can not cause any problems, + * and ensures that this code is architecturally compliant. + */ + for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); + addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); - return ret; + flush_tlb_kernel_range(__phys_to_virt(start), + __phys_to_virt(end)); + + iotable_init(&map, 1); + } } -core_initcall(consistent_init); +static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, + void *data) +{ + struct page *page = virt_to_page(addr); + pgprot_t prot = *(pgprot_t *)data; -static void * -__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot) + set_pte_ext(pte, mk_pte(page, prot), 0); + return 0; +} + +static void __dma_remap(struct page *page, size_t size, pgprot_t prot) { - struct arm_vmregion *c; - size_t align; - int bit; + unsigned long start = (unsigned long) page_address(page); + unsigned end = start + size; + + apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); + flush_tlb_kernel_range(start, end); +} + +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller) +{ + struct page *page; + void *ptr; + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; - if (!consistent_pte[0]) { - printk(KERN_ERR "%s: not initialised\n", __func__); - dump_stack(); + ptr = __dma_alloc_remap(page, size, gfp, prot, caller); + if (!ptr) { + __dma_free_buffer(page, size); return NULL; } - /* - * Align the virtual region allocation - maximum alignment is - * a section size, minimum is a page size. This helps reduce - * fragmentation of the DMA space, and also prevents allocations - * smaller than a section from crossing a section boundary. - */ - bit = fls(size - 1); - if (bit > SECTION_SHIFT) - bit = SECTION_SHIFT; - align = 1 << bit; + *ret_page = page; + return ptr; +} + +static void *__alloc_from_pool(size_t size, struct page **ret_page) +{ + struct dma_pool *pool = &atomic_pool; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned int pageno; + unsigned long flags; + void *ptr = NULL; + unsigned long align_mask; + + if (!pool->vaddr) { + WARN(1, "coherent pool not initialised!\n"); + return NULL; + } /* - * Allocate a virtual address in the consistent mapping region. + * Align the region allocation - allocations from pool are rather + * small, so align them to their order in pages, minimum is a page + * size. This helps reduce fragmentation of the DMA space. */ - c = arm_vmregion_alloc(&consistent_head, align, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); - if (c) { - pte_t *pte; - int idx = CONSISTENT_PTE_INDEX(c->vm_start); - u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); + align_mask = (1 << get_order(size)) - 1; + + spin_lock_irqsave(&pool->lock, flags); + pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages, + 0, count, align_mask); + if (pageno < pool->nr_pages) { + bitmap_set(pool->bitmap, pageno, count); + ptr = pool->vaddr + PAGE_SIZE * pageno; + *ret_page = pool->pages[pageno]; + } else { + pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n" + "Please increase it with coherent_pool= kernel parameter!\n", + (unsigned)pool->size / 1024); + } + spin_unlock_irqrestore(&pool->lock, flags); - pte = consistent_pte[idx] + off; - c->vm_pages = page; + return ptr; +} - do { - BUG_ON(!pte_none(*pte)); +static bool __in_atomic_pool(void *start, size_t size) +{ + struct dma_pool *pool = &atomic_pool; + void *end = start + size; + void *pool_start = pool->vaddr; + void *pool_end = pool->vaddr + pool->size; - set_pte_ext(pte, mk_pte(page, prot), 0); - page++; - pte++; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - pte = consistent_pte[++idx]; - } - } while (size -= PAGE_SIZE); + if (start < pool_start || start >= pool_end) + return false; - dsb(); + if (end <= pool_end) + return true; - return (void *)c->vm_start; - } - return NULL; + WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n", + start, end - 1, pool_start, pool_end - 1); + + return false; } -static void __dma_free_remap(void *cpu_addr, size_t size) +static int __free_from_pool(void *start, size_t size) { - struct arm_vmregion *c; - unsigned long addr; - pte_t *ptep; - int idx; - u32 off; - - c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); - if (!c) { - printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", - __func__, cpu_addr); - dump_stack(); - return; - } + struct dma_pool *pool = &atomic_pool; + unsigned long pageno, count; + unsigned long flags; - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } + if (!__in_atomic_pool(start, size)) + return 0; - idx = CONSISTENT_PTE_INDEX(c->vm_start); - off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); - ptep = consistent_pte[idx] + off; - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - - ptep++; - addr += PAGE_SIZE; - off++; - if (off >= PTRS_PER_PTE) { - off = 0; - ptep = consistent_pte[++idx]; - } + pageno = (start - pool->vaddr) >> PAGE_SHIFT; + count = size >> PAGE_SHIFT; + + spin_lock_irqsave(&pool->lock, flags); + bitmap_clear(pool->bitmap, pageno, count); + spin_unlock_irqrestore(&pool->lock, flags); - if (pte_none(pte) || !pte_present(pte)) - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); - } while (size -= PAGE_SIZE); + return 1; +} - flush_tlb_kernel_range(c->vm_start, c->vm_end); +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page, + const void *caller) +{ + unsigned long order = get_order(size); + size_t count = size >> PAGE_SHIFT; + struct page *page; + void *ptr; - arm_vmregion_free(&consistent_head, c); + page = dma_alloc_from_contiguous(dev, count, order); + if (!page) + return NULL; + + __dma_clear_buffer(page, size); + + if (PageHighMem(page)) { + ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller); + if (!ptr) { + dma_release_from_contiguous(dev, page, count); + return NULL; + } + } else { + __dma_remap(page, size, prot); + ptr = page_address(page); + } + *ret_page = page; + return ptr; +} + +static void __free_from_contiguous(struct device *dev, struct page *page, + void *cpu_addr, size_t size) +{ + if (PageHighMem(page)) + __dma_free_remap(cpu_addr, size); + else + __dma_remap(page, size, PAGE_KERNEL); + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); } +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) +{ + prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ? + pgprot_writecombine(prot) : + pgprot_dmacoherent(prot); + return prot; +} + +#define nommu() 0 + #else /* !CONFIG_MMU */ -#define __dma_alloc_remap(page, size, gfp, prot) page_address(page) -#define __dma_free_remap(addr, size) do { } while (0) +#define nommu() 1 + +#define __get_dma_pgprot(attrs, prot) __pgprot(0) +#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL +#define __alloc_from_pool(size, ret_page) NULL +#define __alloc_from_contiguous(dev, size, prot, ret, c) NULL +#define __free_from_pool(cpu_addr, size) 0 +#define __free_from_contiguous(dev, page, cpu_addr, size) do { } while (0) +#define __dma_free_remap(cpu_addr, size) do { } while (0) #endif /* CONFIG_MMU */ -static void * -__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, - pgprot_t prot) +static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, + struct page **ret_page) { struct page *page; + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; + + *ret_page = page; + return page_address(page); +} + + + +static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller) +{ + u64 mask = get_coherent_dma_mask(dev); + struct page *page = NULL; void *addr; - *handle = ~0; - size = PAGE_ALIGN(size); +#ifdef CONFIG_DMA_API_DEBUG + u64 limit = (mask + 1) & ~mask; + if (limit && size >= limit) { + dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", + size, mask); + return NULL; + } +#endif - page = __dma_alloc_buffer(dev, size, gfp); - if (!page) + if (!mask) return NULL; - if (!arch_is_coherent()) - addr = __dma_alloc_remap(page, size, gfp, prot); + if (mask < 0xffffffffULL) + gfp |= GFP_DMA; + + /* + * Following is a work-around (a.k.a. hack) to prevent pages + * with __GFP_COMP being passed to split_page() which cannot + * handle them. The real problem is that this flag probably + * should be 0 on ARM as it is not supported on this + * platform; see CONFIG_HUGETLBFS. + */ + gfp &= ~(__GFP_COMP); + + *handle = DMA_ERROR_CODE; + size = PAGE_ALIGN(size); + + if (is_coherent || nommu()) + addr = __alloc_simple_buffer(dev, size, gfp, &page); + else if (!(gfp & __GFP_WAIT)) + addr = __alloc_from_pool(size, &page); + else if (!dev_get_cma_area(dev)) + addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); else - addr = page_address(page); + addr = __alloc_from_contiguous(dev, size, prot, &page, caller); if (addr) - *handle = page_to_dma(dev, page); + *handle = pfn_to_dma(dev, page_to_pfn(page)); return addr; } @@ -320,185 +725,176 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. */ -void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) +void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, struct dma_attrs *attrs) { + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); void *memory; if (dma_alloc_from_coherent(dev, size, handle, &memory)) return memory; - return __dma_alloc(dev, size, handle, gfp, - pgprot_dmacoherent(pgprot_kernel)); + return __dma_alloc(dev, size, handle, gfp, prot, false, + __builtin_return_address(0)); } -EXPORT_SYMBOL(dma_alloc_coherent); -/* - * Allocate a writecombining region, in much the same way as - * dma_alloc_coherent above. - */ -void * -dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { - return __dma_alloc(dev, size, handle, gfp, - pgprot_writecombine(pgprot_kernel)); + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); + void *memory; + + if (dma_alloc_from_coherent(dev, size, handle, &memory)) + return memory; + + return __dma_alloc(dev, size, handle, gfp, prot, true, + __builtin_return_address(0)); } -EXPORT_SYMBOL(dma_alloc_writecombine); -static int dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) +/* + * Create userspace mapping for the DMA-coherent memory. + */ +int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) { int ret = -ENXIO; #ifdef CONFIG_MMU - unsigned long user_size, kern_size; - struct arm_vmregion *c; - - user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long pfn = dma_to_pfn(dev, dma_addr); + unsigned long off = vma->vm_pgoff; - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - if (c) { - unsigned long off = vma->vm_pgoff; + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); - kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; - if (off < kern_size && - user_size <= (kern_size - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - page_to_pfn(c->vm_pages) + off, - user_size << PAGE_SHIFT, - vma->vm_page_prot); - } + if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); } #endif /* CONFIG_MMU */ return ret; } -int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); - return dma_mmap(dev, vma, cpu_addr, dma_addr, size); -} -EXPORT_SYMBOL(dma_mmap_coherent); - -int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - return dma_mmap(dev, vma, cpu_addr, dma_addr, size); -} -EXPORT_SYMBOL(dma_mmap_writecombine); - /* - * free a page as defined by the above mapping. - * Must not be called with IRQs disabled. + * Free a buffer as defined by the above mapping. */ -void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) +static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs, + bool is_coherent) { - WARN_ON(irqs_disabled()); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return; size = PAGE_ALIGN(size); - if (!arch_is_coherent()) + if (is_coherent || nommu()) { + __dma_free_buffer(page, size); + } else if (__free_from_pool(cpu_addr, size)) { + return; + } else if (!dev_get_cma_area(dev)) { __dma_free_remap(cpu_addr, size); - - __dma_free_buffer(dma_to_page(dev, handle), size); + __dma_free_buffer(page, size); + } else { + /* + * Non-atomic allocations cannot be freed with IRQs disabled + */ + WARN_ON(irqs_disabled()); + __free_from_contiguous(dev, page, cpu_addr, size); + } } -EXPORT_SYMBOL(dma_free_coherent); -/* - * Make an area consistent for devices. - * Note: Drivers should NOT use this function directly, as it will break - * platforms with CONFIG_DMABOUNCE. - * Use the driver DMA support - see dma-mapping.h (dma_sync_*) - */ -void ___dma_single_cpu_to_dev(const void *kaddr, size_t size, - enum dma_data_direction dir) +void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) { - unsigned long paddr; - - BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); - - dmac_map_area(kaddr, size, dir); + __arm_dma_free(dev, size, cpu_addr, handle, attrs, false); +} - paddr = __pa(kaddr); - if (dir == DMA_FROM_DEVICE) { - outer_inv_range(paddr, paddr + size); - } else { - outer_clean_range(paddr, paddr + size); - } - /* FIXME: non-speculating: flush on bidirectional mappings? */ +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); } -EXPORT_SYMBOL(___dma_single_cpu_to_dev); -void ___dma_single_dev_to_cpu(const void *kaddr, size_t size, - enum dma_data_direction dir) +int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + struct dma_attrs *attrs) { - BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); + int ret; - /* FIXME: non-speculating: not required */ - /* don't bother invalidating if DMA to device */ - if (dir != DMA_TO_DEVICE) { - unsigned long paddr = __pa(kaddr); - outer_inv_range(paddr, paddr + size); - } + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (unlikely(ret)) + return ret; - dmac_unmap_area(kaddr, size, dir); + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return 0; } -EXPORT_SYMBOL(___dma_single_dev_to_cpu); static void dma_cache_maint_page(struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, void (*op)(const void *, size_t, int)) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + offset / PAGE_SIZE; + offset %= PAGE_SIZE; + /* * A single sg entry may refer to multiple physically contiguous * pages. But we still need to process highmem pages individually. * If highmem is not configured then the bulk of this loop gets * optimized out. */ - size_t left = size; do { size_t len = left; void *vaddr; + page = pfn_to_page(pfn); + if (PageHighMem(page)) { - if (len + offset > PAGE_SIZE) { - if (offset >= PAGE_SIZE) { - page += offset / PAGE_SIZE; - offset %= PAGE_SIZE; - } + if (len + offset > PAGE_SIZE) len = PAGE_SIZE - offset; - } - vaddr = kmap_high_get(page); - if (vaddr) { - vaddr += offset; - op(vaddr, len, dir); - kunmap_high(page); - } else if (cache_is_vipt()) { - pte_t saved_pte; - vaddr = kmap_high_l1_vipt(page, &saved_pte); + + if (cache_is_vipt_nonaliasing()) { + vaddr = kmap_atomic(page); op(vaddr + offset, len, dir); - kunmap_high_l1_vipt(page, saved_pte); + kunmap_atomic(vaddr); + } else { + vaddr = kmap_high_get(page); + if (vaddr) { + op(vaddr + offset, len, dir); + kunmap_high(page); + } } } else { vaddr = page_address(page) + offset; op(vaddr, len, dir); } offset = 0; - page++; + pfn++; left -= len; } while (left); } -void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, +/* + * Make an area consistent for devices. + * Note: Drivers should NOT use this function directly, as it will break + * platforms with CONFIG_DMABOUNCE. + * Use the driver DMA support - see dma-mapping.h (dma_sync_*) + */ +static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, size_t size, enum dma_data_direction dir) { - unsigned long paddr; + phys_addr_t paddr; dma_cache_maint_page(page, off, size, dir, dmac_map_area); @@ -510,30 +906,43 @@ void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, } /* FIXME: non-speculating: flush on bidirectional mappings? */ } -EXPORT_SYMBOL(___dma_page_cpu_to_dev); -void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, +static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, size_t size, enum dma_data_direction dir) { - unsigned long paddr = page_to_phys(page) + off; + phys_addr_t paddr = page_to_phys(page) + off; /* FIXME: non-speculating: not required */ - /* don't bother invalidating if DMA to device */ - if (dir != DMA_TO_DEVICE) + /* in any case, don't bother invalidating if DMA to device */ + if (dir != DMA_TO_DEVICE) { outer_inv_range(paddr, paddr + size); - dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); + dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); + } /* - * Mark the D-cache clean for this page to avoid extra flushing. + * Mark the D-cache clean for these pages to avoid extra flushing. */ - if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) - set_bit(PG_dcache_clean, &page->flags); + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + off / PAGE_SIZE; + off %= PAGE_SIZE; + if (off) { + pfn++; + left -= PAGE_SIZE - off; + } + while (left >= PAGE_SIZE) { + page = pfn_to_page(pfn++); + set_bit(PG_dcache_clean, &page->flags); + left -= PAGE_SIZE; + } + } } -EXPORT_SYMBOL(___dma_page_dev_to_cpu); /** - * dma_map_sg - map a set of SG buffers for streaming mode DMA + * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @sg: list of buffers * @nents: number of buffers to map @@ -548,15 +957,19 @@ EXPORT_SYMBOL(___dma_page_dev_to_cpu); * Device ownership issues as mentioned for dma_map_single are the same * here. */ -int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir) +int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) { + struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i, j; for_each_sg(sg, s, nents, i) { - s->dma_address = dma_map_page(dev, sg_page(s), s->offset, - s->length, dir); +#ifdef CONFIG_NEED_SG_DMA_LENGTH + s->dma_length = s->length; +#endif + s->dma_address = ops->map_page(dev, sg_page(s), s->offset, + s->length, dir, attrs); if (dma_mapping_error(dev, s->dma_address)) goto bad_mapping; } @@ -564,76 +977,1116 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, bad_mapping: for_each_sg(sg, s, i, j) - dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); + ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); return 0; } -EXPORT_SYMBOL(dma_map_sg); /** - * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @sg: list of buffers - * @nents: number of buffers to unmap (returned from dma_map_sg) + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) * @dir: DMA transfer direction (same as was passed to dma_map_sg) * * Unmap a set of streaming mode DMA translations. Again, CPU access * rules concerning calls here are the same as for dma_unmap_single(). */ -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir) +void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) { + struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; + int i; for_each_sg(sg, s, nents, i) - dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); + ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); } -EXPORT_SYMBOL(dma_unmap_sg); /** - * dma_sync_sg_for_cpu + * arm_dma_sync_sg_for_cpu * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @sg: list of buffers * @nents: number of buffers to map (returned from dma_map_sg) * @dir: DMA transfer direction (same as was passed to dma_map_sg) */ -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, +void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { + struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i; - for_each_sg(sg, s, nents, i) { - if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, - sg_dma_len(s), dir)) + for_each_sg(sg, s, nents, i) + ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length, + dir); +} + +/** + * arm_dma_sync_sg_for_device + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + ops->sync_single_for_device(dev, sg_dma_address(s), s->length, + dir); +} + +/* + * Return whether the given device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during bus mastering, then you would pass 0x00ffffff as the mask + * to this function. + */ +int dma_supported(struct device *dev, u64 mask) +{ + return __dma_supported(dev, mask, false); +} +EXPORT_SYMBOL(dma_supported); + +int arm_dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + + *dev->dma_mask = dma_mask; + + return 0; +} + +#define PREALLOC_DMA_DEBUG_ENTRIES 4096 + +static int __init dma_debug_do_init(void) +{ + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + return 0; +} +fs_initcall(dma_debug_do_init); + +#ifdef CONFIG_ARM_DMA_USE_IOMMU + +/* IOMMU */ + +static int extend_iommu_mapping(struct dma_iommu_mapping *mapping); + +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, + size_t size) +{ + unsigned int order = get_order(size); + unsigned int align = 0; + unsigned int count, start; + size_t mapping_size = mapping->bits << PAGE_SHIFT; + unsigned long flags; + dma_addr_t iova; + int i; + + if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT) + order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT; + + count = PAGE_ALIGN(size) >> PAGE_SHIFT; + align = (1 << order) - 1; + + spin_lock_irqsave(&mapping->lock, flags); + for (i = 0; i < mapping->nr_bitmaps; i++) { + start = bitmap_find_next_zero_area(mapping->bitmaps[i], + mapping->bits, 0, count, align); + + if (start > mapping->bits) continue; - __dma_page_dev_to_cpu(sg_page(s), s->offset, - s->length, dir); + bitmap_set(mapping->bitmaps[i], start, count); + break; + } + + /* + * No unused range found. Try to extend the existing mapping + * and perform a second attempt to reserve an IO virtual + * address range of size bytes. + */ + if (i == mapping->nr_bitmaps) { + if (extend_iommu_mapping(mapping)) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_ERROR_CODE; + } + + start = bitmap_find_next_zero_area(mapping->bitmaps[i], + mapping->bits, 0, count, align); + + if (start > mapping->bits) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_ERROR_CODE; + } + + bitmap_set(mapping->bitmaps[i], start, count); + } + spin_unlock_irqrestore(&mapping->lock, flags); + + iova = mapping->base + (mapping_size * i); + iova += start << PAGE_SHIFT; + + return iova; +} + +static inline void __free_iova(struct dma_iommu_mapping *mapping, + dma_addr_t addr, size_t size) +{ + unsigned int start, count; + size_t mapping_size = mapping->bits << PAGE_SHIFT; + unsigned long flags; + dma_addr_t bitmap_base; + u32 bitmap_index; + + if (!size) + return; + + bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size; + BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions); + + bitmap_base = mapping->base + mapping_size * bitmap_index; + + start = (addr - bitmap_base) >> PAGE_SHIFT; + + if (addr + size > bitmap_base + mapping_size) { + /* + * The address range to be freed reaches into the iova + * range of the next bitmap. This should not happen as + * we don't allow this in __alloc_iova (at the + * moment). + */ + BUG(); + } else + count = size >> PAGE_SHIFT; + + spin_lock_irqsave(&mapping->lock, flags); + bitmap_clear(mapping->bitmaps[bitmap_index], start, count); + spin_unlock_irqrestore(&mapping->lock, flags); +} + +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, + gfp_t gfp, struct dma_attrs *attrs) +{ + struct page **pages; + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i = 0; + + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, gfp); + else + pages = vzalloc(array_size); + if (!pages) + return NULL; + + if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) + { + unsigned long order = get_order(size); + struct page *page; + + page = dma_alloc_from_contiguous(dev, count, order); + if (!page) + goto error; + + __dma_clear_buffer(page, size); + + for (i = 0; i < count; i++) + pages[i] = page + i; + + return pages; + } + + /* + * IOMMU can map any pages, so himem can also be used here + */ + gfp |= __GFP_NOWARN | __GFP_HIGHMEM; + + while (count) { + int j, order = __fls(count); + + pages[i] = alloc_pages(gfp, order); + while (!pages[i] && order) + pages[i] = alloc_pages(gfp, --order); + if (!pages[i]) + goto error; + + if (order) { + split_page(pages[i], order); + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + } + + __dma_clear_buffer(pages[i], PAGE_SIZE << order); + i += 1 << order; + count -= 1 << order; + } + + return pages; +error: + while (i--) + if (pages[i]) + __free_pages(pages[i], 0); + if (array_size <= PAGE_SIZE) + kfree(pages); + else + vfree(pages); + return NULL; +} + +static int __iommu_free_buffer(struct device *dev, struct page **pages, + size_t size, struct dma_attrs *attrs) +{ + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i; + + if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) { + dma_release_from_contiguous(dev, pages[0], count); + } else { + for (i = 0; i < count; i++) + if (pages[i]) + __free_pages(pages[i], 0); + } + + if (array_size <= PAGE_SIZE) + kfree(pages); + else + vfree(pages); + return 0; +} + +/* + * Create a CPU mapping for a specified pages + */ +static void * +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) +{ + unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct vm_struct *area; + unsigned long p; + + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) + return NULL; + + area->pages = pages; + area->nr_pages = nr_pages; + p = (unsigned long)area->addr; + + for (i = 0; i < nr_pages; i++) { + phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i])); + if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot)) + goto err; + p += PAGE_SIZE; + } + return area->addr; +err: + unmap_kernel_range((unsigned long)area->addr, size); + vunmap(area->addr); + return NULL; +} + +/* + * Create a mapping in device IO address space for specified pages + */ +static dma_addr_t +__iommu_create_mapping(struct device *dev, struct page **pages, size_t size) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + dma_addr_t dma_addr, iova; + int i, ret = DMA_ERROR_CODE; + + dma_addr = __alloc_iova(mapping, size); + if (dma_addr == DMA_ERROR_CODE) + return dma_addr; + + iova = dma_addr; + for (i = 0; i < count; ) { + unsigned int next_pfn = page_to_pfn(pages[i]) + 1; + phys_addr_t phys = page_to_phys(pages[i]); + unsigned int len, j; + + for (j = i + 1; j < count; j++, next_pfn++) + if (page_to_pfn(pages[j]) != next_pfn) + break; + + len = (j - i) << PAGE_SHIFT; + ret = iommu_map(mapping->domain, iova, phys, len, + IOMMU_READ|IOMMU_WRITE); + if (ret < 0) + goto fail; + iova += len; + i = j; + } + return dma_addr; +fail: + iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); + __free_iova(mapping, dma_addr, size); + return DMA_ERROR_CODE; +} + +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + + /* + * add optional in-page offset from iova to size and align + * result to page size + */ + size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); + iova &= PAGE_MASK; + + iommu_unmap(mapping->domain, iova, size); + __free_iova(mapping, iova, size); + return 0; +} + +static struct page **__atomic_get_pages(void *addr) +{ + struct dma_pool *pool = &atomic_pool; + struct page **pages = pool->pages; + int offs = (addr - pool->vaddr) >> PAGE_SHIFT; + + return pages + offs; +} + +static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) +{ + struct vm_struct *area; + + if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) + return __atomic_get_pages(cpu_addr); + + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + return cpu_addr; + + area = find_vm_area(cpu_addr); + if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) + return area->pages; + return NULL; +} + +static void *__iommu_alloc_atomic(struct device *dev, size_t size, + dma_addr_t *handle) +{ + struct page *page; + void *addr; + + addr = __alloc_from_pool(size, &page); + if (!addr) + return NULL; + + *handle = __iommu_create_mapping(dev, &page, size); + if (*handle == DMA_ERROR_CODE) + goto err_mapping; + + return addr; + +err_mapping: + __free_from_pool(addr, size); + return NULL; +} + +static void __iommu_free_atomic(struct device *dev, void *cpu_addr, + dma_addr_t handle, size_t size) +{ + __iommu_remove_mapping(dev, handle, size); + __free_from_pool(cpu_addr, size); +} + +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); + struct page **pages; + void *addr = NULL; + + *handle = DMA_ERROR_CODE; + size = PAGE_ALIGN(size); + + if (!(gfp & __GFP_WAIT)) + return __iommu_alloc_atomic(dev, size, handle); + + /* + * Following is a work-around (a.k.a. hack) to prevent pages + * with __GFP_COMP being passed to split_page() which cannot + * handle them. The real problem is that this flag probably + * should be 0 on ARM as it is not supported on this + * platform; see CONFIG_HUGETLBFS. + */ + gfp &= ~(__GFP_COMP); + + pages = __iommu_alloc_buffer(dev, size, gfp, attrs); + if (!pages) + return NULL; + + *handle = __iommu_create_mapping(dev, pages, size); + if (*handle == DMA_ERROR_CODE) + goto err_buffer; + + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + return pages; + + addr = __iommu_alloc_remap(pages, size, gfp, prot, + __builtin_return_address(0)); + if (!addr) + goto err_mapping; + + return addr; + +err_mapping: + __iommu_remove_mapping(dev, *handle, size); +err_buffer: + __iommu_free_buffer(dev, pages, size, attrs); + return NULL; +} + +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + unsigned long uaddr = vma->vm_start; + unsigned long usize = vma->vm_end - vma->vm_start; + struct page **pages = __iommu_get_pages(cpu_addr, attrs); + + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); + + if (!pages) + return -ENXIO; + + do { + int ret = vm_insert_page(vma, uaddr, *pages++); + if (ret) { + pr_err("Remapping memory failed: %d\n", ret); + return ret; + } + uaddr += PAGE_SIZE; + usize -= PAGE_SIZE; + } while (usize > 0); + + return 0; +} + +/* + * free a page as defined by the above mapping. + * Must not be called with IRQs disabled. + */ +void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + struct page **pages; + size = PAGE_ALIGN(size); + + if (__in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, cpu_addr, handle, size); + return; + } + + pages = __iommu_get_pages(cpu_addr, attrs); + if (!pages) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + + if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); + } + + __iommu_remove_mapping(dev, handle, size); + __iommu_free_buffer(dev, pages, size, attrs); +} + +static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, + size_t size, struct dma_attrs *attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page **pages = __iommu_get_pages(cpu_addr, attrs); + + if (!pages) + return -ENXIO; + + return sg_alloc_table_from_pages(sgt, pages, count, 0, size, + GFP_KERNEL); +} + +static int __dma_direction_to_prot(enum dma_data_direction dir) +{ + int prot; + + switch (dir) { + case DMA_BIDIRECTIONAL: + prot = IOMMU_READ | IOMMU_WRITE; + break; + case DMA_TO_DEVICE: + prot = IOMMU_READ; + break; + case DMA_FROM_DEVICE: + prot = IOMMU_WRITE; + break; + default: + prot = 0; + } + + return prot; +} + +/* + * Map a part of the scatter-gather list into contiguous io address space + */ +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, + size_t size, dma_addr_t *handle, + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova, iova_base; + int ret = 0; + unsigned int count; + struct scatterlist *s; + int prot; + + size = PAGE_ALIGN(size); + *handle = DMA_ERROR_CODE; + + iova_base = iova = __alloc_iova(mapping, size); + if (iova == DMA_ERROR_CODE) + return -ENOMEM; + + for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { + phys_addr_t phys = page_to_phys(sg_page(s)); + unsigned int len = PAGE_ALIGN(s->offset + s->length); + + if (!is_coherent && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); + + prot = __dma_direction_to_prot(dir); + + ret = iommu_map(mapping->domain, iova, phys, len, prot); + if (ret < 0) + goto fail; + count += len >> PAGE_SHIFT; + iova += len; } + *handle = iova_base; + + return 0; +fail: + iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); + __free_iova(mapping, iova_base, size); + return ret; +} + +static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) +{ + struct scatterlist *s = sg, *dma = sg, *start = sg; + int i, count = 0; + unsigned int offset = s->offset; + unsigned int size = s->offset + s->length; + unsigned int max = dma_get_max_seg_size(dev); + + for (i = 1; i < nents; i++) { + s = sg_next(s); + + s->dma_address = DMA_ERROR_CODE; + s->dma_length = 0; + + if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { + if (__map_sg_chunk(dev, start, size, &dma->dma_address, + dir, attrs, is_coherent) < 0) + goto bad_mapping; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + size = offset = s->offset; + start = s; + dma = sg_next(dma); + count += 1; + } + size += s->length; + } + if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, + is_coherent) < 0) + goto bad_mapping; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + return count+1; + +bad_mapping: + for_each_sg(sg, s, count, i) + __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); + return 0; } -EXPORT_SYMBOL(dma_sync_sg_for_cpu); /** - * dma_sync_sg_for_device - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of i/o coherent buffers described by scatterlist in streaming + * mode for DMA. The scatter gather list elements are merged together (if + * possible) and tagged with the appropriate dma address and length. They are + * obtained via sg_dma_{address,length}. + */ +int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * The scatter gather list elements are merged together (if possible) and + * tagged with the appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + */ +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, false); +} + +static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (sg_dma_len(s)) + __iommu_remove_mapping(dev, sg_dma_address(s), + sg_dma_len(s)); + if (!is_coherent && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_dev_to_cpu(sg_page(s), s->offset, + s->length, dir); + } +} + +/** + * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, false); +} + +/** + * arm_iommu_sync_sg_for_cpu + * @dev: valid struct device pointer * @sg: list of buffers * @nents: number of buffers to map (returned from dma_map_sg) * @dir: DMA transfer direction (same as was passed to dma_map_sg) */ -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { struct scatterlist *s; int i; - for_each_sg(sg, s, nents, i) { - if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, - sg_dma_len(s), dir)) - continue; + for_each_sg(sg, s, nents, i) + __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); + +} + +/** + * arm_iommu_sync_sg_for_device + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); +} + + +/** + * arm_coherent_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Coherent IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t dma_addr; + int ret, prot, len = PAGE_ALIGN(size + offset); + + dma_addr = __alloc_iova(mapping, len); + if (dma_addr == DMA_ERROR_CODE) + return dma_addr; + + prot = __dma_direction_to_prot(dir); + + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); + if (ret < 0) + goto fail; + + return dma_addr + offset; +fail: + __free_iova(mapping, dma_addr, len); + return DMA_ERROR_CODE; +} - __dma_page_cpu_to_dev(sg_page(s), s->offset, - s->length, dir); +/** + * arm_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_cpu_to_dev(page, offset, size, dir); + + return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); +} + +/** + * arm_coherent_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * Coherent IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +/** + * arm_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_dev_to_cpu(page, offset, size, dir); + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +static void arm_iommu_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + unsigned int offset = handle & ~PAGE_MASK; + + if (!iova) + return; + + __dma_page_dev_to_cpu(page, offset, size, dir); +} + +static void arm_iommu_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + unsigned int offset = handle & ~PAGE_MASK; + + if (!iova) + return; + + __dma_page_cpu_to_dev(page, offset, size, dir); +} + +struct dma_map_ops iommu_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, + + .map_page = arm_iommu_map_page, + .unmap_page = arm_iommu_unmap_page, + .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, + .sync_single_for_device = arm_iommu_sync_single_for_device, + + .map_sg = arm_iommu_map_sg, + .unmap_sg = arm_iommu_unmap_sg, + .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, + .sync_sg_for_device = arm_iommu_sync_sg_for_device, + + .set_dma_mask = arm_dma_set_mask, +}; + +struct dma_map_ops iommu_coherent_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, + + .map_page = arm_coherent_iommu_map_page, + .unmap_page = arm_coherent_iommu_unmap_page, + + .map_sg = arm_coherent_iommu_map_sg, + .unmap_sg = arm_coherent_iommu_unmap_sg, + + .set_dma_mask = arm_dma_set_mask, +}; + +/** + * arm_iommu_create_mapping + * @bus: pointer to the bus holding the client device (for IOMMU calls) + * @base: start address of the valid IO address space + * @size: maximum size of the valid IO address space + * + * Creates a mapping structure which holds information about used/unused + * IO address ranges, which is required to perform memory allocation and + * mapping with IOMMU aware functions. + * + * The client device need to be attached to the mapping with + * arm_iommu_attach_device function. + */ +struct dma_iommu_mapping * +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size) +{ + unsigned int bits = size >> PAGE_SHIFT; + unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); + struct dma_iommu_mapping *mapping; + int extensions = 1; + int err = -ENOMEM; + + if (!bitmap_size) + return ERR_PTR(-EINVAL); + + if (bitmap_size > PAGE_SIZE) { + extensions = bitmap_size / PAGE_SIZE; + bitmap_size = PAGE_SIZE; + } + + mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); + if (!mapping) + goto err; + + mapping->bitmap_size = bitmap_size; + mapping->bitmaps = kzalloc(extensions * sizeof(unsigned long *), + GFP_KERNEL); + if (!mapping->bitmaps) + goto err2; + + mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL); + if (!mapping->bitmaps[0]) + goto err3; + + mapping->nr_bitmaps = 1; + mapping->extensions = extensions; + mapping->base = base; + mapping->bits = BITS_PER_BYTE * bitmap_size; + + spin_lock_init(&mapping->lock); + + mapping->domain = iommu_domain_alloc(bus); + if (!mapping->domain) + goto err4; + + kref_init(&mapping->kref); + return mapping; +err4: + kfree(mapping->bitmaps[0]); +err3: + kfree(mapping->bitmaps); +err2: + kfree(mapping); +err: + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(arm_iommu_create_mapping); + +static void release_iommu_mapping(struct kref *kref) +{ + int i; + struct dma_iommu_mapping *mapping = + container_of(kref, struct dma_iommu_mapping, kref); + + iommu_domain_free(mapping->domain); + for (i = 0; i < mapping->nr_bitmaps; i++) + kfree(mapping->bitmaps[i]); + kfree(mapping->bitmaps); + kfree(mapping); +} + +static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) +{ + int next_bitmap; + + if (mapping->nr_bitmaps > mapping->extensions) + return -EINVAL; + + next_bitmap = mapping->nr_bitmaps; + mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size, + GFP_ATOMIC); + if (!mapping->bitmaps[next_bitmap]) + return -ENOMEM; + + mapping->nr_bitmaps++; + + return 0; +} + +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) +{ + if (mapping) + kref_put(&mapping->kref, release_iommu_mapping); +} +EXPORT_SYMBOL_GPL(arm_iommu_release_mapping); + +/** + * arm_iommu_attach_device + * @dev: valid struct device pointer + * @mapping: io address space mapping structure (returned from + * arm_iommu_create_mapping) + * + * Attaches specified io address space mapping to the provided device, + * this replaces the dma operations (dma_map_ops pointer) with the + * IOMMU aware version. More than one client might be attached to + * the same io address space mapping. + */ +int arm_iommu_attach_device(struct device *dev, + struct dma_iommu_mapping *mapping) +{ + int err; + + err = iommu_attach_device(mapping->domain, dev); + if (err) + return err; + + kref_get(&mapping->kref); + dev->archdata.mapping = mapping; + set_dma_ops(dev, &iommu_ops); + + pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); + return 0; +} +EXPORT_SYMBOL_GPL(arm_iommu_attach_device); + +/** + * arm_iommu_detach_device + * @dev: valid struct device pointer + * + * Detaches the provided device from a previously attached map. + * This voids the dma operations (dma_map_ops pointer) + */ +void arm_iommu_detach_device(struct device *dev) +{ + struct dma_iommu_mapping *mapping; + + mapping = to_dma_iommu_mapping(dev); + if (!mapping) { + dev_warn(dev, "Not attached\n"); + return; } + + iommu_detach_device(mapping->domain, dev); + kref_put(&mapping->kref, release_iommu_mapping); + dev->archdata.mapping = NULL; + set_dma_ops(dev, NULL); + + pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); } -EXPORT_SYMBOL(dma_sync_sg_for_device); +EXPORT_SYMBOL_GPL(arm_iommu_detach_device); + +#endif diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c new file mode 100644 index 00000000000..c508f41a43b --- /dev/null +++ b/arch/arm/mm/dump.c @@ -0,0 +1,365 @@ +/* + * Debug helper to dump the current kernel pagetables of the system + * so that we can see what the various memory ranges are set to. + * + * Derived from x86 implementation: + * (C) Copyright 2008 Intel Corporation + * + * Author: Arjan van de Ven <arjan@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/seq_file.h> + +#include <asm/fixmap.h> +#include <asm/pgtable.h> + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +static struct addr_marker address_markers[] = { + { MODULES_VADDR, "Modules" }, + { PAGE_OFFSET, "Kernel Mapping" }, + { 0, "vmalloc() Area" }, + { VMALLOC_END, "vmalloc() End" }, + { FIXADDR_START, "Fixmap Area" }, + { CONFIG_VECTORS_BASE, "Vectors" }, + { CONFIG_VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" }, + { -1, NULL }, +}; + +struct pg_state { + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned level; + u64 current_prot; +}; + +struct prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +static const struct prot_bits pte_bits[] = { + { + .mask = L_PTE_USER, + .val = L_PTE_USER, + .set = "USR", + .clear = " ", + }, { + .mask = L_PTE_RDONLY, + .val = L_PTE_RDONLY, + .set = "ro", + .clear = "RW", + }, { + .mask = L_PTE_XN, + .val = L_PTE_XN, + .set = "NX", + .clear = "x ", + }, { + .mask = L_PTE_SHARED, + .val = L_PTE_SHARED, + .set = "SHD", + .clear = " ", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_UNCACHED, + .set = "SO/UNCACHED", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_BUFFERABLE, + .set = "MEM/BUFFERABLE/WC", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_WRITETHROUGH, + .set = "MEM/CACHED/WT", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_WRITEBACK, + .set = "MEM/CACHED/WBRA", +#ifndef CONFIG_ARM_LPAE + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_MINICACHE, + .set = "MEM/MINICACHE", +#endif + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_WRITEALLOC, + .set = "MEM/CACHED/WBWA", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_SHARED, + .set = "DEV/SHARED", +#ifndef CONFIG_ARM_LPAE + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_NONSHARED, + .set = "DEV/NONSHARED", +#endif + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_WC, + .set = "DEV/WC", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_CACHED, + .set = "DEV/CACHED", + }, +}; + +static const struct prot_bits section_bits[] = { +#ifdef CONFIG_ARM_LPAE + { + .mask = PMD_SECT_USER, + .val = PMD_SECT_USER, + .set = "USR", + }, { + .mask = PMD_SECT_RDONLY, + .val = PMD_SECT_RDONLY, + .set = "ro", + .clear = "RW", +#elif __LINUX_ARM_ARCH__ >= 6 + { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_APX | PMD_SECT_AP_WRITE, + .set = " ro", + }, { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_WRITE, + .set = " RW", + }, { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ, + .set = "USR ro", + }, { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .set = "USR RW", +#else /* ARMv4/ARMv5 */ + /* These are approximate */ + { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = 0, + .set = " ro", + }, { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_WRITE, + .set = " RW", + }, { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ, + .set = "USR ro", + }, { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .set = "USR RW", +#endif + }, { + .mask = PMD_SECT_XN, + .val = PMD_SECT_XN, + .set = "NX", + .clear = "x ", + }, { + .mask = PMD_SECT_S, + .val = PMD_SECT_S, + .set = "SHD", + .clear = " ", + }, +}; + +struct pg_level { + const struct prot_bits *bits; + size_t num; + u64 mask; +}; + +static struct pg_level pg_level[] = { + { + }, { /* pgd */ + }, { /* pud */ + }, { /* pmd */ + .bits = section_bits, + .num = ARRAY_SIZE(section_bits), + }, { /* pte */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, +}; + +static void dump_prot(struct pg_state *st, const struct prot_bits *bits, size_t num) +{ + unsigned i; + + for (i = 0; i < num; i++, bits++) { + const char *s; + + if ((st->current_prot & bits->mask) == bits->val) + s = bits->set; + else + s = bits->clear; + + if (s) + seq_printf(st->seq, " %s", s); + } +} + +static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u64 val) +{ + static const char units[] = "KMGTPE"; + u64 prot = val & pg_level[level].mask; + + if (addr < USER_PGTABLES_CEILING) + return; + + if (!st->level) { + st->level = level; + st->current_prot = prot; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } else if (prot != st->current_prot || level != st->level || + addr >= st->marker[1].start_address) { + const char *unit = units; + unsigned long delta; + + if (st->current_prot) { + seq_printf(st->seq, "0x%08lx-0x%08lx ", + st->start_address, addr); + + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(st->seq, "%9lu%c", delta, *unit); + if (pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num); + seq_printf(st->seq, "\n"); + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + st->start_address = addr; + st->current_prot = prot; + st->level = level; + } +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + addr = start + i * PAGE_SIZE; + note_page(st, addr, 4, pte_val(*pte)); + } +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + addr = start + i * PMD_SIZE; + if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd)) + note_page(st, addr, 3, pmd_val(*pmd)); + else + walk_pte(st, pmd, addr); + + if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1])) + note_page(st, addr + SECTION_SIZE, 3, pmd_val(pmd[1])); + } +} + +static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + pud_t *pud = pud_offset(pgd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + addr = start + i * PUD_SIZE; + if (!pud_none(*pud)) { + walk_pmd(st, pud, addr); + } else { + note_page(st, addr, 2, pud_val(*pud)); + } + } +} + +static void walk_pgd(struct seq_file *m) +{ + pgd_t *pgd = swapper_pg_dir; + struct pg_state st; + unsigned long addr; + unsigned i, pgdoff = USER_PGTABLES_CEILING / PGDIR_SIZE; + + memset(&st, 0, sizeof(st)); + st.seq = m; + st.marker = address_markers; + + pgd += pgdoff; + + for (i = pgdoff; i < PTRS_PER_PGD; i++, pgd++) { + addr = i * PGDIR_SIZE; + if (!pgd_none(*pgd)) { + walk_pud(&st, pgd, addr); + } else { + note_page(&st, addr, 1, pgd_val(*pgd)); + } + } + + note_page(&st, 0, 0, 0); +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + walk_pgd(m); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ptdump_init(void) +{ + struct dentry *pe; + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + if (pg_level[i].bits) + for (j = 0; j < pg_level[i].num; j++) + pg_level[i].mask |= pg_level[i].bits[j].mask; + + address_markers[2].start_address = VMALLOC_START; + + pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + &ptdump_fops); + return pe ? 0 : -ENOMEM; +} +__initcall(ptdump_init); diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c index 9d285626bc7..312e15e6d00 100644 --- a/arch/arm/mm/extable.c +++ b/arch/arm/mm/extable.c @@ -9,8 +9,13 @@ int fixup_exception(struct pt_regs *regs) const struct exception_table_entry *fixup; fixup = search_exception_tables(instruction_pointer(regs)); - if (fixup) + if (fixup) { regs->ARM_pc = fixup->fixup; +#ifdef CONFIG_THUMB2_KERNEL + /* Clear the IT state to avoid nasty surprises in the fixup */ + regs->ARM_cpsr &= ~PSR_IT_MASK; +#endif + } return fixup != NULL; } diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 83e59f87042..ff379ac115d 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -8,7 +8,6 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/module.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -26,7 +25,7 @@ #include "mm.h" -static unsigned long shared_pte_mask = L_PTE_MT_BUFFERABLE; +static pteval_t shared_pte_mask = L_PTE_MT_BUFFERABLE; #if __LINUX_ARM_ARCH__ < 6 /* @@ -66,7 +65,7 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address, return ret; } -#if USE_SPLIT_PTLOCKS +#if USE_SPLIT_PTE_PTLOCKS /* * If we are using split PTE locks, then we need to take the page * lock here. Otherwise we are using shared mm->page_table_lock @@ -85,16 +84,17 @@ static inline void do_pte_unlock(spinlock_t *ptl) { spin_unlock(ptl); } -#else /* !USE_SPLIT_PTLOCKS */ +#else /* !USE_SPLIT_PTE_PTLOCKS */ static inline void do_pte_lock(spinlock_t *ptl) {} static inline void do_pte_unlock(spinlock_t *ptl) {} -#endif /* USE_SPLIT_PTLOCKS */ +#endif /* USE_SPLIT_PTE_PTLOCKS */ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) { spinlock_t *ptl; pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; int ret; @@ -103,7 +103,11 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, if (pgd_none_or_clear_bad(pgd)) return 0; - pmd = pmd_offset(pgd, address); + pud = pud_offset(pgd, address); + if (pud_none_or_clear_bad(pud)) + return 0; + + pmd = pmd_offset(pud, address); if (pmd_none_or_clear_bad(pmd)) return 0; @@ -130,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *mpnt; - struct prio_tree_iter iter; unsigned long offset; pgoff_t pgoff; int aliases = 0; @@ -143,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, * cache coherency. */ flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { /* * If this VMA is not in our MM, we can ignore it. * Note that we intentionally mask out the VMA diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 1e21e125fe3..eb8830a4c5e 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -20,25 +20,14 @@ #include <linux/highmem.h> #include <linux/perf_event.h> -#include <asm/system.h> +#include <asm/exception.h> #include <asm/pgtable.h> +#include <asm/system_misc.h> +#include <asm/system_info.h> #include <asm/tlbflush.h> #include "fault.h" -/* - * Fault status register encodings. We steal bit 31 for our own purposes. - */ -#define FSR_LNX_PF (1 << 31) -#define FSR_WRITE (1 << 11) -#define FSR_FS4 (1 << 10) -#define FSR_FS3_0 (15) - -static inline int fsr_fs(unsigned int fsr) -{ - return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6; -} - #ifdef CONFIG_MMU #ifdef CONFIG_KPROBES @@ -76,9 +65,11 @@ void show_pte(struct mm_struct *mm, unsigned long addr) printk(KERN_ALERT "pgd = %p\n", mm->pgd); pgd = pgd_offset(mm, addr); - printk(KERN_ALERT "[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); + printk(KERN_ALERT "[%08lx] *pgd=%08llx", + addr, (long long)pgd_val(*pgd)); do { + pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -90,9 +81,21 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; } - pmd = pmd_offset(pgd, addr); + pud = pud_offset(pgd, addr); + if (PTRS_PER_PUD != 1) + printk(", *pud=%08llx", (long long)pud_val(*pud)); + + if (pud_none(*pud)) + break; + + if (pud_bad(*pud)) { + printk("(bad)"); + break; + } + + pmd = pmd_offset(pud, addr); if (PTRS_PER_PMD != 1) - printk(", *pmd=%08lx", pmd_val(*pmd)); + printk(", *pmd=%08llx", (long long)pmd_val(*pmd)); if (pmd_none(*pmd)) break; @@ -107,8 +110,11 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; pte = pte_offset_map(pmd, addr); - printk(", *pte=%08lx", pte_val(*pte)); - printk(", *ppte=%08lx", pte_val(pte[-PTRS_PER_PTE])); + printk(", *pte=%08llx", (long long)pte_val(*pte)); +#ifndef CONFIG_ARM_LPAE + printk(", *ppte=%08llx", + (long long)pte_val(pte[PTE_HWTABLE_PTRS])); +#endif pte_unmap(pte); } while(0); @@ -159,7 +165,8 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr, struct siginfo si; #ifdef CONFIG_DEBUG_USER - if (user_debug & UDBG_SEGV) { + if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || + ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", tsk->comm, sig, addr, fsr); show_pte(tsk->mm, addr); @@ -215,7 +222,7 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) static int __kprobes __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, - struct task_struct *tsk) + unsigned int flags, struct task_struct *tsk) { struct vm_area_struct *vma; int fault; @@ -237,21 +244,12 @@ good_area: goto out; } - /* - * If for any reason at all we couldn't handle the fault, make - * sure we exit gracefully rather than endlessly redo the fault. - */ - fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, (fsr & FSR_WRITE) ? FAULT_FLAG_WRITE : 0); - if (unlikely(fault & VM_FAULT_ERROR)) - return fault; - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; - return fault; + return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) + /* Don't allow expansion below FIRST_USER_ADDRESS */ + if (vma->vm_flags & VM_GROWSDOWN && + addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr)) goto good_area; out: return fault; @@ -263,6 +261,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (notify_page_fault(regs, fsr)) return 0; @@ -270,6 +269,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) tsk = current; mm = tsk->mm; + /* Enable interrupts if they were enabled in the parent context. */ + if (interrupts_enabled(regs)) + local_irq_enable(); + /* * If we're in an interrupt or have no user * context, we must not take the fault.. @@ -277,6 +280,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (fsr & FSR_WRITE) + flags |= FAULT_FLAG_WRITE; + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -285,6 +293,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!down_read_trylock(&mm->mmap_sem)) { if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc)) goto no_context; +retry: down_read(&mm->mmap_sem); } else { /* @@ -300,14 +309,42 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) #endif } - fault = __do_page_fault(mm, addr, fsr, tsk); - up_read(&mm->mmap_sem); + fault = __do_page_fault(mm, addr, fsr, flags, tsk); - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, addr); - if (fault & VM_FAULT_MAJOR) - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, addr); - else if (fault & VM_FAULT_MINOR) - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, addr); + /* If we need to retry but a fatal signal is pending, handle the + * signal first. We do not need to release the mmap_sem because + * it would already be released in __lock_page_or_retry in + * mm/filemap.c. */ + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return 0; + + /* + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. + */ + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, addr); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, addr); + } + if (fault & VM_FAULT_RETRY) { + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + goto retry; + } + } + + up_read(&mm->mmap_sem); /* * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR @@ -315,6 +352,13 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) return 0; + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + */ + if (!user_mode(regs)) + goto no_context; + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return to @@ -325,13 +369,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) return 0; } - /* - * If we are in kernel mode at this point, we - * have no context to handle this fault with. - */ - if (!user_mode(regs)) - goto no_context; - if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to @@ -388,6 +425,7 @@ do_translation_fault(unsigned long addr, unsigned int fsr, { unsigned int index; pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; if (addr < TASK_SIZE) @@ -398,21 +436,31 @@ do_translation_fault(unsigned long addr, unsigned int fsr, index = pgd_index(addr); - /* - * FIXME: CP15 C1 is write only on ARMv3 architectures. - */ pgd = cpu_get_pgd() + index; pgd_k = init_mm.pgd + index; if (pgd_none(*pgd_k)) goto bad_area; - if (!pgd_present(*pgd)) set_pgd(pgd, *pgd_k); - pmd_k = pmd_offset(pgd_k, addr); - pmd = pmd_offset(pgd, addr); + pud = pud_offset(pgd, addr); + pud_k = pud_offset(pgd_k, addr); + + if (pud_none(*pud_k)) + goto bad_area; + if (!pud_present(*pud)) + set_pud(pud, *pud_k); + + pmd = pmd_offset(pud, addr); + pmd_k = pmd_offset(pud_k, addr); +#ifdef CONFIG_ARM_LPAE + /* + * Only one hardware entry per PMD with LPAE. + */ + index = 0; +#else /* * On ARM one Linux PGD entry contains two hardware entries (see page * tables layout in pgtable.h). We normally guarantee that we always @@ -422,6 +470,7 @@ do_translation_fault(unsigned long addr, unsigned int fsr, * for the first of pair. */ index = (addr >> SECTION_SHIFT) & 1; +#endif if (pmd_none(pmd_k[index])) goto bad_area; @@ -445,12 +494,14 @@ do_translation_fault(unsigned long addr, unsigned int fsr, * Some section permission faults need to be handled gracefully. * They can happen due to a __{get,put}_user during an oops. */ +#ifndef CONFIG_ARM_LPAE static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { do_bad_area(addr, fsr, regs); return 0; } +#endif /* CONFIG_ARM_LPAE */ /* * This abort handler always returns "fault". @@ -461,55 +512,20 @@ do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) return 1; } -static struct fsr_info { +struct fsr_info { int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs); int sig; int code; const char *name; -} fsr_info[] = { - /* - * The following are the standard ARMv3 and ARMv4 aborts. ARMv5 - * defines these to be "precise" aborts. - */ - { do_bad, SIGSEGV, 0, "vector exception" }, - { do_bad, SIGBUS, BUS_ADRALN, "alignment exception" }, - { do_bad, SIGKILL, 0, "terminal exception" }, - { do_bad, SIGBUS, BUS_ADRALN, "alignment exception" }, - { do_bad, SIGBUS, 0, "external abort on linefetch" }, - { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, - { do_bad, SIGBUS, 0, "external abort on linefetch" }, - { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, - { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" }, - { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" }, - { do_bad, SIGBUS, 0, "external abort on translation" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" }, - { do_bad, SIGBUS, 0, "external abort on translation" }, - { do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" }, - /* - * The following are "imprecise" aborts, which are signalled by bit - * 10 of the FSR, and may not be recoverable. These are only - * supported if the CPU abort handler supports bit 10. - */ - { do_bad, SIGBUS, 0, "unknown 16" }, - { do_bad, SIGBUS, 0, "unknown 17" }, - { do_bad, SIGBUS, 0, "unknown 18" }, - { do_bad, SIGBUS, 0, "unknown 19" }, - { do_bad, SIGBUS, 0, "lock abort" }, /* xscale */ - { do_bad, SIGBUS, 0, "unknown 21" }, - { do_bad, SIGBUS, BUS_OBJERR, "imprecise external abort" }, /* xscale */ - { do_bad, SIGBUS, 0, "unknown 23" }, - { do_bad, SIGBUS, 0, "dcache parity error" }, /* xscale */ - { do_bad, SIGBUS, 0, "unknown 25" }, - { do_bad, SIGBUS, 0, "unknown 26" }, - { do_bad, SIGBUS, 0, "unknown 27" }, - { do_bad, SIGBUS, 0, "unknown 28" }, - { do_bad, SIGBUS, 0, "unknown 29" }, - { do_bad, SIGBUS, 0, "unknown 30" }, - { do_bad, SIGBUS, 0, "unknown 31" } }; +/* FSR definition */ +#ifdef CONFIG_ARM_LPAE +#include "fsr-3level.c" +#else +#include "fsr-2level.c" +#endif + void __init hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), int sig, int code, const char *name) @@ -545,42 +561,6 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) arm_notify_die("", regs, &info, fsr, 0); } - -static struct fsr_info ifsr_info[] = { - { do_bad, SIGBUS, 0, "unknown 0" }, - { do_bad, SIGBUS, 0, "unknown 1" }, - { do_bad, SIGBUS, 0, "debug event" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "section access flag fault" }, - { do_bad, SIGBUS, 0, "unknown 4" }, - { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "page access flag fault" }, - { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, - { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" }, - { do_bad, SIGBUS, 0, "unknown 10" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" }, - { do_bad, SIGBUS, 0, "external abort on translation" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" }, - { do_bad, SIGBUS, 0, "external abort on translation" }, - { do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" }, - { do_bad, SIGBUS, 0, "unknown 16" }, - { do_bad, SIGBUS, 0, "unknown 17" }, - { do_bad, SIGBUS, 0, "unknown 18" }, - { do_bad, SIGBUS, 0, "unknown 19" }, - { do_bad, SIGBUS, 0, "unknown 20" }, - { do_bad, SIGBUS, 0, "unknown 21" }, - { do_bad, SIGBUS, 0, "unknown 22" }, - { do_bad, SIGBUS, 0, "unknown 23" }, - { do_bad, SIGBUS, 0, "unknown 24" }, - { do_bad, SIGBUS, 0, "unknown 25" }, - { do_bad, SIGBUS, 0, "unknown 26" }, - { do_bad, SIGBUS, 0, "unknown 27" }, - { do_bad, SIGBUS, 0, "unknown 28" }, - { do_bad, SIGBUS, 0, "unknown 29" }, - { do_bad, SIGBUS, 0, "unknown 30" }, - { do_bad, SIGBUS, 0, "unknown 31" }, -}; - void __init hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), int sig, int code, const char *name) @@ -613,6 +593,7 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) arm_notify_die("", regs, &info, ifsr, 0); } +#ifndef CONFIG_ARM_LPAE static int __init exceptions_init(void) { if (cpu_architecture() >= CPU_ARCH_ARMv6) { @@ -635,3 +616,4 @@ static int __init exceptions_init(void) } arch_initcall(exceptions_init); +#endif diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h index 49e9e3804de..cf08bdfbe0d 100644 --- a/arch/arm/mm/fault.h +++ b/arch/arm/mm/fault.h @@ -1,3 +1,28 @@ -void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs); +#ifndef __ARCH_ARM_FAULT_H +#define __ARCH_ARM_FAULT_H + +/* + * Fault status register encodings. We steal bit 31 for our own purposes. + */ +#define FSR_LNX_PF (1 << 31) +#define FSR_WRITE (1 << 11) +#define FSR_FS4 (1 << 10) +#define FSR_FS3_0 (15) +#define FSR_FS5_0 (0x3f) + +#ifdef CONFIG_ARM_LPAE +static inline int fsr_fs(unsigned int fsr) +{ + return fsr & FSR_FS5_0; +} +#else +static inline int fsr_fs(unsigned int fsr) +{ + return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6; +} +#endif +void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs); unsigned long search_exception_table(unsigned long addr); + +#endif /* __ARCH_ARM_FAULT_H */ diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 391ffae7509..43d54f5b26b 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -10,28 +10,25 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/pagemap.h> +#include <linux/highmem.h> #include <asm/cacheflush.h> #include <asm/cachetype.h> #include <asm/highmem.h> #include <asm/smp_plat.h> -#include <asm/system.h> #include <asm/tlbflush.h> -#include <asm/smp_plat.h> +#include <linux/hugetlb.h> #include "mm.h" #ifdef CONFIG_CPU_CACHE_VIPT -#define ALIAS_FLUSH_START 0xffff4000 - static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) { - unsigned long to = ALIAS_FLUSH_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); + unsigned long to = FLUSH_ALIAS_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); const int zero = 0; - set_pte_ext(TOP_PTE(to), pfn_pte(pfn, PAGE_KERNEL), 0); - flush_tlb_kernel_page(to); + set_top_pte(to, pfn_pte(pfn, PAGE_KERNEL)); asm( "mcrr p15, 0, %1, %0, c14\n" " mcr p15, 0, %2, c7, c10, 4" @@ -42,13 +39,12 @@ static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) static void flush_icache_alias(unsigned long pfn, unsigned long vaddr, unsigned long len) { - unsigned long colour = CACHE_COLOUR(vaddr); + unsigned long va = FLUSH_ALIAS_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); unsigned long offset = vaddr & (PAGE_SIZE - 1); unsigned long to; - set_pte_ext(TOP_PTE(ALIAS_FLUSH_START) + colour, pfn_pte(pfn, PAGE_KERNEL), 0); - to = ALIAS_FLUSH_START + (colour << PAGE_SHIFT) + offset; - flush_tlb_kernel_page(to); + set_top_pte(va, pfn_pte(pfn, PAGE_KERNEL)); + to = va + offset; flush_icache_range(to, to + len); } @@ -108,17 +104,20 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsig #define flush_icache_alias(pfn,vaddr,len) do { } while (0) #endif +#define FLAG_PA_IS_EXEC 1 +#define FLAG_PA_CORE_IN_MM 2 + static void flush_ptrace_access_other(void *args) { __flush_icache_all(); } -static -void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, - unsigned long uaddr, void *kaddr, unsigned long len) +static inline +void __flush_ptrace_access(struct page *page, unsigned long uaddr, void *kaddr, + unsigned long len, unsigned int flags) { if (cache_is_vivt()) { - if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { + if (flags & FLAG_PA_CORE_IN_MM) { unsigned long addr = (unsigned long)kaddr; __cpuc_coherent_kern_range(addr, addr + len); } @@ -132,7 +131,7 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, } /* VIPT non-aliasing D-cache */ - if (vma->vm_flags & VM_EXEC) { + if (flags & FLAG_PA_IS_EXEC) { unsigned long addr = (unsigned long)kaddr; if (icache_is_vipt_aliasing()) flush_icache_alias(page_to_pfn(page), uaddr, len); @@ -144,6 +143,26 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, } } +static +void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *kaddr, unsigned long len) +{ + unsigned int flags = 0; + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) + flags |= FLAG_PA_CORE_IN_MM; + if (vma->vm_flags & VM_EXEC) + flags |= FLAG_PA_IS_EXEC; + __flush_ptrace_access(page, uaddr, kaddr, len, flags); +} + +void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, + void *kaddr, unsigned long len) +{ + unsigned int flags = FLAG_PA_CORE_IN_MM|FLAG_PA_IS_EXEC; + + __flush_ptrace_access(page, uaddr, kaddr, len, flags); +} + /* * Copy user data from/to a page which is mapped into a different * processes address space. Really, we want to allow our "user @@ -173,17 +192,24 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) * coherent with the kernels mapping. */ if (!PageHighMem(page)) { - __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); + size_t page_size = PAGE_SIZE << compound_order(page); + __cpuc_flush_dcache_area(page_address(page), page_size); } else { - void *addr = kmap_high_get(page); - if (addr) { - __cpuc_flush_dcache_area(addr, PAGE_SIZE); - kunmap_high(page); - } else if (cache_is_vipt()) { - pte_t saved_pte; - addr = kmap_high_l1_vipt(page, &saved_pte); - __cpuc_flush_dcache_area(addr, PAGE_SIZE); - kunmap_high_l1_vipt(page, saved_pte); + unsigned long i; + if (cache_is_vipt_nonaliasing()) { + for (i = 0; i < (1 << compound_order(page)); i++) { + void *addr = kmap_atomic(page + i); + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_atomic(addr); + } + } else { + for (i = 0; i < (1 << compound_order(page)); i++) { + void *addr = kmap_high_get(page + i); + if (addr) { + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high(page + i); + } + } } } @@ -201,7 +227,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p { struct mm_struct *mm = current->active_mm; struct vm_area_struct *mpnt; - struct prio_tree_iter iter; pgoff_t pgoff; /* @@ -213,7 +238,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { unsigned long offset; /* @@ -236,8 +261,6 @@ void __sync_icache_dcache(pte_t pteval) struct page *page; struct address_space *mapping; - if (!pte_present_user(pteval)) - return; if (cache_is_vipt_nonaliasing() && !pte_exec(pteval)) /* only flush non-aliasing VIPT caches for exec mappings */ return; @@ -253,8 +276,8 @@ void __sync_icache_dcache(pte_t pteval) if (!test_and_set_bit(PG_dcache_clean, &page->flags)) __flush_dcache_page(mapping, page); - /* pte_exec() already checked above for non-aliasing VIPT cache */ - if (cache_is_vipt_nonaliasing() || pte_exec(pteval)) + + if (pte_exec(pteval)) __flush_icache_all(); } #endif @@ -275,7 +298,8 @@ void __sync_icache_dcache(pte_t pteval) * kernel cache lines for later. Otherwise, we assume we have * aliasing mappings. * - * Note that we disable the lazy flush for SMP. + * Note that we disable the lazy flush for SMP configurations where + * the cache maintenance operations are not automatically broadcasted. */ void flush_dcache_page(struct page *page) { @@ -291,7 +315,7 @@ void flush_dcache_page(struct page *page) mapping = page_mapping(page); if (!cache_ops_need_broadcast() && - mapping && !mapping_mapped(mapping)) + mapping && !page_mapped(page)) clear_bit(PG_dcache_clean, &page->flags); else { __flush_dcache_page(mapping, page); @@ -305,6 +329,39 @@ void flush_dcache_page(struct page *page) EXPORT_SYMBOL(flush_dcache_page); /* + * Ensure cache coherency for the kernel mapping of this page. We can + * assume that the page is pinned via kmap. + * + * If the page only exists in the page cache and there are no user + * space mappings, this is a no-op since the page was already marked + * dirty at creation. Otherwise, we need to flush the dirty kernel + * cache lines directly. + */ +void flush_kernel_dcache_page(struct page *page) +{ + if (cache_is_vivt() || cache_is_vipt_aliasing()) { + struct address_space *mapping; + + mapping = page_mapping(page); + + if (!mapping || mapping_mapped(mapping)) { + void *addr; + + addr = page_address(page); + /* + * kmap_atomic() doesn't set the page virtual + * address for highmem pages, and + * kunmap_atomic() takes care of cache + * flushing already. + */ + if (!IS_ENABLED(CONFIG_HIGHMEM) || addr) + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + } + } +} +EXPORT_SYMBOL(flush_kernel_dcache_page); + +/* * Flush an anonymous page so that users of get_user_pages() * can safely access the data. The expected sequence is: * diff --git a/arch/arm/mm/fsr-2level.c b/arch/arm/mm/fsr-2level.c new file mode 100644 index 00000000000..18ca74c0f34 --- /dev/null +++ b/arch/arm/mm/fsr-2level.c @@ -0,0 +1,78 @@ +static struct fsr_info fsr_info[] = { + /* + * The following are the standard ARMv3 and ARMv4 aborts. ARMv5 + * defines these to be "precise" aborts. + */ + { do_bad, SIGSEGV, 0, "vector exception" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment exception" }, + { do_bad, SIGKILL, 0, "terminal exception" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment exception" }, + { do_bad, SIGBUS, 0, "external abort on linefetch" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, + { do_bad, SIGBUS, 0, "external abort on linefetch" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" }, + /* + * The following are "imprecise" aborts, which are signalled by bit + * 10 of the FSR, and may not be recoverable. These are only + * supported if the CPU abort handler supports bit 10. + */ + { do_bad, SIGBUS, 0, "unknown 16" }, + { do_bad, SIGBUS, 0, "unknown 17" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "lock abort" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 21" }, + { do_bad, SIGBUS, BUS_OBJERR, "imprecise external abort" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 23" }, + { do_bad, SIGBUS, 0, "dcache parity error" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 25" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "unknown 28" }, + { do_bad, SIGBUS, 0, "unknown 29" }, + { do_bad, SIGBUS, 0, "unknown 30" }, + { do_bad, SIGBUS, 0, "unknown 31" }, +}; + +static struct fsr_info ifsr_info[] = { + { do_bad, SIGBUS, 0, "unknown 0" }, + { do_bad, SIGBUS, 0, "unknown 1" }, + { do_bad, SIGBUS, 0, "debug event" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section access flag fault" }, + { do_bad, SIGBUS, 0, "unknown 4" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" }, + { do_bad, SIGBUS, 0, "unknown 10" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" }, + { do_bad, SIGBUS, 0, "unknown 16" }, + { do_bad, SIGBUS, 0, "unknown 17" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "unknown 20" }, + { do_bad, SIGBUS, 0, "unknown 21" }, + { do_bad, SIGBUS, 0, "unknown 22" }, + { do_bad, SIGBUS, 0, "unknown 23" }, + { do_bad, SIGBUS, 0, "unknown 24" }, + { do_bad, SIGBUS, 0, "unknown 25" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "unknown 28" }, + { do_bad, SIGBUS, 0, "unknown 29" }, + { do_bad, SIGBUS, 0, "unknown 30" }, + { do_bad, SIGBUS, 0, "unknown 31" }, +}; diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c new file mode 100644 index 00000000000..ab4409a2307 --- /dev/null +++ b/arch/arm/mm/fsr-3level.c @@ -0,0 +1,68 @@ +static struct fsr_info fsr_info[] = { + { do_bad, SIGBUS, 0, "unknown 0" }, + { do_bad, SIGBUS, 0, "unknown 1" }, + { do_bad, SIGBUS, 0, "unknown 2" }, + { do_bad, SIGBUS, 0, "unknown 3" }, + { do_bad, SIGBUS, 0, "reserved translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, + { do_bad, SIGBUS, 0, "reserved access flag fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, + { do_bad, SIGBUS, 0, "reserved permission fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, + { do_bad, SIGBUS, 0, "synchronous external abort" }, + { do_bad, SIGBUS, 0, "asynchronous external abort" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error" }, + { do_bad, SIGBUS, 0, "asynchronous parity error" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "unknown 32" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment fault" }, + { do_bad, SIGBUS, 0, "debug event" }, + { do_bad, SIGBUS, 0, "unknown 35" }, + { do_bad, SIGBUS, 0, "unknown 36" }, + { do_bad, SIGBUS, 0, "unknown 37" }, + { do_bad, SIGBUS, 0, "unknown 38" }, + { do_bad, SIGBUS, 0, "unknown 39" }, + { do_bad, SIGBUS, 0, "unknown 40" }, + { do_bad, SIGBUS, 0, "unknown 41" }, + { do_bad, SIGBUS, 0, "unknown 42" }, + { do_bad, SIGBUS, 0, "unknown 43" }, + { do_bad, SIGBUS, 0, "unknown 44" }, + { do_bad, SIGBUS, 0, "unknown 45" }, + { do_bad, SIGBUS, 0, "unknown 46" }, + { do_bad, SIGBUS, 0, "unknown 47" }, + { do_bad, SIGBUS, 0, "unknown 48" }, + { do_bad, SIGBUS, 0, "unknown 49" }, + { do_bad, SIGBUS, 0, "unknown 50" }, + { do_bad, SIGBUS, 0, "unknown 51" }, + { do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" }, + { do_bad, SIGBUS, 0, "unknown 53" }, + { do_bad, SIGBUS, 0, "unknown 54" }, + { do_bad, SIGBUS, 0, "unknown 55" }, + { do_bad, SIGBUS, 0, "unknown 56" }, + { do_bad, SIGBUS, 0, "unknown 57" }, + { do_bad, SIGBUS, 0, "implementation fault (coprocessor abort)" }, + { do_bad, SIGBUS, 0, "unknown 59" }, + { do_bad, SIGBUS, 0, "unknown 60" }, + { do_bad, SIGBUS, 0, "unknown 61" }, + { do_bad, SIGBUS, 0, "unknown 62" }, + { do_bad, SIGBUS, 0, "unknown 63" }, +}; + +#define ifsr_info fsr_info diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index c435fd9e1da..45aeaaca905 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -18,6 +18,21 @@ #include <asm/tlbflush.h> #include "mm.h" +pte_t *fixmap_page_table; + +static inline void set_fixmap_pte(int idx, pte_t pte) +{ + unsigned long vaddr = __fix_to_virt(idx); + set_pte_ext(fixmap_page_table + idx, pte, 0); + local_flush_tlb_kernel_page(vaddr); +} + +static inline pte_t get_fixmap_pte(unsigned long vaddr) +{ + unsigned long idx = __virt_to_fix(vaddr); + return *(fixmap_page_table + idx); +} + void *kmap(struct page *page) { might_sleep(); @@ -36,7 +51,7 @@ void kunmap(struct page *page) } EXPORT_SYMBOL(kunmap); -void *__kmap_atomic(struct page *page) +void *kmap_atomic(struct page *page) { unsigned int idx; unsigned long vaddr; @@ -63,25 +78,24 @@ void *__kmap_atomic(struct page *page) type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR * smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM /* * With debugging enabled, kunmap_atomic forces that entry to 0. * Make sure it was indeed properly unmapped. */ - BUG_ON(!pte_none(*(TOP_PTE(vaddr)))); + BUG_ON(!pte_none(*(fixmap_page_table + idx))); #endif - set_pte_ext(TOP_PTE(vaddr), mk_pte(page, kmap_prot), 0); /* * When debugging is off, kunmap_atomic leaves the previous mapping - * in place, so this TLB flush ensures the TLB is updated with the - * new mapping. + * in place, so the contained TLB flush ensures the TLB is updated + * with the new mapping. */ - local_flush_tlb_kernel_page(vaddr); + set_fixmap_pte(idx, mk_pte(page, kmap_prot)); return (void *)vaddr; } -EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(kmap_atomic); void __kunmap_atomic(void *kvaddr) { @@ -95,9 +109,8 @@ void __kunmap_atomic(void *kvaddr) if (cache_is_vivt()) __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); #ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - set_pte_ext(TOP_PTE(vaddr), __pte(0), 0); - local_flush_tlb_kernel_page(vaddr); + BUG_ON(vaddr != __fix_to_virt(idx)); + set_fixmap_pte(idx, __pte(0)); #else (void) idx; /* to kill a warning */ #endif @@ -119,12 +132,11 @@ void *kmap_atomic_pfn(unsigned long pfn) type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR * smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(TOP_PTE(vaddr)))); + BUG_ON(!pte_none(*(fixmap_page_table + idx))); #endif - set_pte_ext(TOP_PTE(vaddr), pfn_pte(pfn, kmap_prot), 0); - local_flush_tlb_kernel_page(vaddr); + set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); return (void *)vaddr; } @@ -132,98 +144,9 @@ void *kmap_atomic_pfn(unsigned long pfn) struct page *kmap_atomic_to_page(const void *ptr) { unsigned long vaddr = (unsigned long)ptr; - pte_t *pte; if (vaddr < FIXADDR_START) return virt_to_page(ptr); - pte = TOP_PTE(vaddr); - return pte_page(*pte); -} - -#ifdef CONFIG_CPU_CACHE_VIPT - -#include <linux/percpu.h> - -/* - * The VIVT cache of a highmem page is always flushed before the page - * is unmapped. Hence unmapped highmem pages need no cache maintenance - * in that case. - * - * However unmapped pages may still be cached with a VIPT cache, and - * it is not possible to perform cache maintenance on them using physical - * addresses unfortunately. So we have no choice but to set up a temporary - * virtual mapping for that purpose. - * - * Yet this VIPT cache maintenance may be triggered from DMA support - * functions which are possibly called from interrupt context. As we don't - * want to keep interrupt disabled all the time when such maintenance is - * taking place, we therefore allow for some reentrancy by preserving and - * restoring the previous fixmap entry before the interrupted context is - * resumed. If the reentrancy depth is 0 then there is no need to restore - * the previous fixmap, and leaving the current one in place allow it to - * be reused the next time without a TLB flush (common with DMA). - */ - -static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth); - -void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte) -{ - unsigned int idx, cpu; - int *depth; - unsigned long vaddr, flags; - pte_t pte, *ptep; - - if (!in_interrupt()) - preempt_disable(); - - cpu = smp_processor_id(); - depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); - - idx = KM_L1_CACHE + KM_TYPE_NR * cpu; - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - ptep = TOP_PTE(vaddr); - pte = mk_pte(page, kmap_prot); - - raw_local_irq_save(flags); - (*depth)++; - if (pte_val(*ptep) == pte_val(pte)) { - *saved_pte = pte; - } else { - *saved_pte = *ptep; - set_pte_ext(ptep, pte, 0); - local_flush_tlb_kernel_page(vaddr); - } - raw_local_irq_restore(flags); - - return (void *)vaddr; + return pte_page(get_fixmap_pte(vaddr)); } - -void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte) -{ - unsigned int idx, cpu = smp_processor_id(); - int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); - unsigned long vaddr, flags; - pte_t pte, *ptep; - - idx = KM_L1_CACHE + KM_TYPE_NR * cpu; - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - ptep = TOP_PTE(vaddr); - pte = mk_pte(page, kmap_prot); - - BUG_ON(pte_val(*ptep) != pte_val(pte)); - BUG_ON(*depth <= 0); - - raw_local_irq_save(flags); - (*depth)--; - if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) { - set_pte_ext(ptep, saved_pte, 0); - local_flush_tlb_kernel_page(vaddr); - } - raw_local_irq_restore(flags); - - if (!in_interrupt()) - preempt_enable(); -} - -#endif /* CONFIG_CPU_CACHE_VIPT */ diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c new file mode 100644 index 00000000000..66781bf3407 --- /dev/null +++ b/arch/arm/mm/hugetlbpage.c @@ -0,0 +1,58 @@ +/* + * arch/arm/mm/hugetlbpage.c + * + * Copyright (C) 2012 ARM Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> + +/* + * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot + * of type casting from pmd_t * to pte_t *. + */ + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +int pmd_huge(pmd_t pmd) +{ + return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c new file mode 100644 index 00000000000..c447ec70e86 --- /dev/null +++ b/arch/arm/mm/idmap.c @@ -0,0 +1,136 @@ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> + +#include <asm/cputype.h> +#include <asm/idmap.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/sections.h> +#include <asm/system_info.h> + +/* + * Note: accesses outside of the kernel image and the identity map area + * are not supported on any CPU using the idmap tables as its current + * page tables. + */ +pgd_t *idmap_pgd; +phys_addr_t (*arch_virt_to_idmap) (unsigned long x); + +#ifdef CONFIG_ARM_LPAE +static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pmd_t *pmd; + unsigned long next; + + if (pud_none_or_clear_bad(pud) || (pud_val(*pud) & L_PGD_SWAPPER)) { + pmd = pmd_alloc_one(&init_mm, addr); + if (!pmd) { + pr_warning("Failed to allocate identity pmd.\n"); + return; + } + /* + * Copy the original PMD to ensure that the PMD entries for + * the kernel image are preserved. + */ + if (!pud_none(*pud)) + memcpy(pmd, pmd_offset(pud, 0), + PTRS_PER_PMD * sizeof(pmd_t)); + pud_populate(&init_mm, pud, pmd); + pmd += pmd_index(addr); + } else + pmd = pmd_offset(pud, addr); + + do { + next = pmd_addr_end(addr, end); + *pmd = __pmd((addr & PMD_MASK) | prot); + flush_pmd_entry(pmd); + } while (pmd++, addr = next, addr != end); +} +#else /* !CONFIG_ARM_LPAE */ +static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pmd_t *pmd = pmd_offset(pud, addr); + + addr = (addr & PMD_MASK) | prot; + pmd[0] = __pmd(addr); + addr += SECTION_SIZE; + pmd[1] = __pmd(addr); + flush_pmd_entry(pmd); +} +#endif /* CONFIG_ARM_LPAE */ + +static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + idmap_add_pmd(pud, addr, next, prot); + } while (pud++, addr = next, addr != end); +} + +static void identity_mapping_add(pgd_t *pgd, const char *text_start, + const char *text_end, unsigned long prot) +{ + unsigned long addr, end; + unsigned long next; + + addr = virt_to_idmap(text_start); + end = virt_to_idmap(text_end); + pr_info("Setting up static identity map for 0x%lx - 0x%lx\n", addr, end); + + prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; + + if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) + prot |= PMD_BIT4; + + pgd += pgd_index(addr); + do { + next = pgd_addr_end(addr, end); + idmap_add_pud(pgd, addr, next, prot); + } while (pgd++, addr = next, addr != end); +} + +extern char __idmap_text_start[], __idmap_text_end[]; + +static int __init init_static_idmap(void) +{ + idmap_pgd = pgd_alloc(&init_mm); + if (!idmap_pgd) + return -ENOMEM; + + identity_mapping_add(idmap_pgd, __idmap_text_start, + __idmap_text_end, 0); + + /* Flush L1 for the hardware to see this page table content */ + flush_cache_louis(); + + return 0; +} +early_initcall(init_static_idmap); + +/* + * In order to soft-boot, we need to switch to a 1:1 mapping for the + * cpu_reset functions. This will then ensure that we have predictable + * results when turning off the mmu. + */ +void setup_mm_for_reboot(void) +{ + /* Switch to the identity mapping. */ + cpu_switch_mm(idmap_pgd, &init_mm); + local_flush_bp_all(); + +#ifdef CONFIG_CPU_HAS_ASID + /* + * We don't have a clean ASID for the identity mapping, which + * may clash with virtual addresses of the previous page tables + * and therefore potentially in the TLB. + */ + local_flush_tlb_all(); +#endif +} diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 5164069ced4..659c75d808d 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -13,17 +13,22 @@ #include <linux/init.h> #include <linux/bootmem.h> #include <linux/mman.h> +#include <linux/export.h> #include <linux/nodemask.h> #include <linux/initrd.h> +#include <linux/of_fdt.h> #include <linux/highmem.h> #include <linux/gfp.h> #include <linux/memblock.h> -#include <linux/sort.h> +#include <linux/dma-contiguous.h> +#include <linux/sizes.h> +#include <asm/cp15.h> #include <asm/mach-types.h> +#include <asm/memblock.h> +#include <asm/prom.h> #include <asm/sections.h> #include <asm/setup.h> -#include <asm/sizes.h> #include <asm/tlb.h> #include <asm/fixmap.h> @@ -32,12 +37,21 @@ #include "mm.h" -static unsigned long phys_initrd_start __initdata = 0; +#ifdef CONFIG_CPU_CP15_MMU +unsigned long __init __clear_cr(unsigned long mask) +{ + cr_alignment = cr_alignment & ~mask; + return cr_alignment; +} +#endif + +static phys_addr_t phys_initrd_start __initdata = 0; static unsigned long phys_initrd_size __initdata = 0; static int __init early_initrd(char *p) { - unsigned long start, size; + phys_addr_t start; + unsigned long size; char *endp; start = memparse(p, &endp); @@ -76,24 +90,21 @@ __tagtable(ATAG_INITRD2, parse_tag_initrd2); * initialization functions, as well as show_mem() for the skipping * of holes in the memory map. It is populated by arm_add_memory(). */ -struct meminfo meminfo; - -void show_mem(void) +void show_mem(unsigned int filter) { int free = 0, total = 0, reserved = 0; - int shared = 0, cached = 0, slab = 0, i; - struct meminfo * mi = &meminfo; + int shared = 0, cached = 0, slab = 0; + struct memblock_region *reg; printk("Mem-info:\n"); - show_free_areas(); + show_free_areas(filter); - for_each_bank (i, mi) { - struct membank *bank = &mi->bank[i]; + for_each_memblock (memory, reg) { unsigned int pfn1, pfn2; struct page *page, *end; - pfn1 = bank_pfn_start(bank); - pfn2 = bank_pfn_end(bank); + pfn1 = memblock_region_memory_base_pfn(reg); + pfn2 = memblock_region_memory_end_pfn(reg); page = pfn_to_page(pfn1); end = pfn_to_page(pfn2 - 1) + 1; @@ -110,8 +121,9 @@ void show_mem(void) free++; else shared += page_count(page) - 1; - page++; - } while (page < end); + pfn1++; + page = pfn_to_page(pfn1); + } while (pfn1 < pfn2); } printk("%d pages of RAM\n", total); @@ -123,85 +135,53 @@ void show_mem(void) } static void __init find_limits(unsigned long *min, unsigned long *max_low, - unsigned long *max_high) + unsigned long *max_high) { - struct meminfo *mi = &meminfo; - int i; - - *min = -1UL; - *max_low = *max_high = 0; - - for_each_bank (i, mi) { - struct membank *bank = &mi->bank[i]; - unsigned long start, end; - - start = bank_pfn_start(bank); - end = bank_pfn_end(bank); - - if (*min > start) - *min = start; - if (*max_high < end) - *max_high = end; - if (bank->highmem) - continue; - if (*max_low < end) - *max_low = end; - } + *max_low = PFN_DOWN(memblock_get_current_limit()); + *min = PFN_UP(memblock_start_of_DRAM()); + *max_high = PFN_DOWN(memblock_end_of_DRAM()); } -static void __init arm_bootmem_init(unsigned long start_pfn, - unsigned long end_pfn) -{ - struct memblock_region *reg; - unsigned int boot_pages; - phys_addr_t bitmap; - pg_data_t *pgdat; +#ifdef CONFIG_ZONE_DMA - /* - * Allocate the bootmem bitmap page. This must be in a region - * of memory which has already been mapped. - */ - boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES, - __pfn_to_phys(end_pfn)); +phys_addr_t arm_dma_zone_size __read_mostly; +EXPORT_SYMBOL(arm_dma_zone_size); - /* - * Initialise the bootmem allocator, handing the - * memory banks over to bootmem. - */ - node_set_online(0); - pgdat = NODE_DATA(0); - init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn); - - /* Free the lowmem regions from memblock into bootmem. */ - for_each_memblock(memory, reg) { - unsigned long start = memblock_region_memory_base_pfn(reg); - unsigned long end = memblock_region_memory_end_pfn(reg); - - if (end >= end_pfn) - end = end_pfn; - if (start >= end) - break; - - free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT); - } +/* + * The DMA mask corresponding to the maximum bus address allocatable + * using GFP_DMA. The default here places no restriction on DMA + * allocations. This must be the smallest DMA mask in the system, + * so a successful GFP_DMA allocation will always satisfy this. + */ +phys_addr_t arm_dma_limit; +unsigned long arm_dma_pfn_limit; - /* Reserve the lowmem memblock reserved regions in bootmem. */ - for_each_memblock(reserved, reg) { - unsigned long start = memblock_region_reserved_base_pfn(reg); - unsigned long end = memblock_region_reserved_end_pfn(reg); +static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole, + unsigned long dma_size) +{ + if (size[0] <= dma_size) + return; - if (end >= end_pfn) - end = end_pfn; - if (start >= end) - break; + size[ZONE_NORMAL] = size[0] - dma_size; + size[ZONE_DMA] = dma_size; + hole[ZONE_NORMAL] = hole[0]; + hole[ZONE_DMA] = 0; +} +#endif - reserve_bootmem(__pfn_to_phys(start), - (end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT); - } +void __init setup_dma_zone(const struct machine_desc *mdesc) +{ +#ifdef CONFIG_ZONE_DMA + if (mdesc->dma_zone_size) { + arm_dma_zone_size = mdesc->dma_zone_size; + arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; + } else + arm_dma_limit = 0xffffffff; + arm_dma_pfn_limit = arm_dma_limit >> PAGE_SHIFT; +#endif } -static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, +static void __init zone_sizes_init(unsigned long min, unsigned long max_low, unsigned long max_high) { unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; @@ -243,27 +223,33 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, #endif } +#ifdef CONFIG_ZONE_DMA /* * Adjust the sizes according to any special requirements for * this machine type. */ - arch_adjust_zones(zone_size, zhole_size); + if (arm_dma_zone_size) + arm_adjust_dma_zone(zone_size, zhole_size, + arm_dma_zone_size >> PAGE_SHIFT); +#endif free_area_init_node(0, zone_size, min, zhole_size); } -#ifndef CONFIG_SPARSEMEM +#ifdef CONFIG_HAVE_ARCH_PFN_VALID int pfn_valid(unsigned long pfn) { - return memblock_is_memory(pfn << PAGE_SHIFT); + return memblock_is_memory(__pfn_to_phys(pfn)); } EXPORT_SYMBOL(pfn_valid); +#endif -static void arm_memory_present(void) +#ifndef CONFIG_SPARSEMEM +static void __init arm_memory_present(void) { } #else -static void arm_memory_present(void) +static void __init arm_memory_present(void) { struct memblock_region *reg; @@ -273,23 +259,23 @@ static void arm_memory_present(void) } #endif -static int __init meminfo_cmp(const void *_a, const void *_b) -{ - const struct membank *a = _a, *b = _b; - long cmp = bank_pfn_start(a) - bank_pfn_start(b); - return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; -} +static bool arm_memblock_steal_permitted = true; -void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) +phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) { - int i; + phys_addr_t phys; + + BUG_ON(!arm_memblock_steal_permitted); - sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL); + phys = memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); + memblock_free(phys, size); + memblock_remove(phys, size); - memblock_init(); - for (i = 0; i < mi->nr_banks; i++) - memblock_add(mi->bank[i].start, mi->bank[i].size); + return phys; +} +void __init arm_memblock_init(const struct machine_desc *mdesc) +{ /* Register the kernel text, kernel data and initrd with memblock. */ #ifdef CONFIG_XIP_KERNEL memblock_reserve(__pa(_sdata), _end - _sdata); @@ -297,6 +283,24 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) memblock_reserve(__pa(_stext), _end - _stext); #endif #ifdef CONFIG_BLK_DEV_INITRD + /* FDT scan will populate initrd_start */ + if (initrd_start && !phys_initrd_size) { + phys_initrd_start = __virt_to_phys(initrd_start); + phys_initrd_size = initrd_end - initrd_start; + } + initrd_start = initrd_end = 0; + if (phys_initrd_size && + !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) { + pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n", + (u64)phys_initrd_start, phys_initrd_size); + phys_initrd_start = phys_initrd_size = 0; + } + if (phys_initrd_size && + memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) { + pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n", + (u64)phys_initrd_start, phys_initrd_size); + phys_initrd_start = phys_initrd_size = 0; + } if (phys_initrd_size) { memblock_reserve(phys_initrd_start, phys_initrd_size); @@ -312,7 +316,15 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); - memblock_analyze(); + early_init_fdt_scan_reserved_mem(); + + /* + * reserve memory for DMA contigouos allocations, + * must come from DMA area inside low memory + */ + dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit)); + + arm_memblock_steal_permitted = false; memblock_dump_all(); } @@ -320,12 +332,11 @@ void __init bootmem_init(void) { unsigned long min, max_low, max_high; + memblock_allow_resize(); max_low = max_high = 0; find_limits(&min, &max_low, &max_high); - arm_bootmem_init(min, max_low); - /* * Sparsemem tries to allocate bootmem in memory_present(), * so must be done after the fixed reservations @@ -342,51 +353,40 @@ void __init bootmem_init(void) * the sparse mem_map arrays initialized by sparse_init() * for memmap_init_zone(), otherwise all PFNs are invalid. */ - arm_bootmem_free(min, max_low, max_high); - - high_memory = __va((max_low << PAGE_SHIFT) - 1) + 1; + zone_sizes_init(min, max_low, max_high); /* * This doesn't seem to be used by the Linux memory manager any * more, but is used by ll_rw_block. If we can get rid of it, we * also get rid of some of the stuff above as well. - * - * Note: max_low_pfn and max_pfn reflect the number of _pages_ in - * the system, not the maximum PFN. */ - max_low_pfn = max_low - PHYS_PFN_OFFSET; - max_pfn = max_high - PHYS_PFN_OFFSET; + min_low_pfn = min; + max_low_pfn = max_low; + max_pfn = max_high; } -static inline int free_area(unsigned long pfn, unsigned long end, char *s) +/* + * Poison init memory with an undefined instruction (ARM) or a branch to an + * undefined instruction (Thumb). + */ +static inline void poison_init_mem(void *s, size_t count) { - unsigned int pages = 0, size = (end - pfn) << (PAGE_SHIFT - 10); - - for (; pfn < end; pfn++) { - struct page *page = pfn_to_page(pfn); - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - pages++; - } - - if (size && s) - printk(KERN_INFO "Freeing %s memory: %dK\n", s, size); - - return pages; + u32 *p = (u32 *)s; + for (; count != 0; count -= 4) + *p++ = 0xe7fddef0; } static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) { struct page *start_pg, *end_pg; - unsigned long pg, pgend; + phys_addr_t pg, pgend; /* * Convert start_pfn/end_pfn to a struct page pointer. */ start_pg = pfn_to_page(start_pfn - 1) + 1; - end_pg = pfn_to_page(end_pfn); + end_pg = pfn_to_page(end_pfn - 1) + 1; /* * Convert to physical addresses, and @@ -400,46 +400,74 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn) * free the section of the memmap array. */ if (pg < pgend) - free_bootmem(pg, pgend - pg); + memblock_free_early(pg, pgend - pg); } /* * The mem_map array can get very big. Free the unused area of the memory map. */ -static void __init free_unused_memmap(struct meminfo *mi) +static void __init free_unused_memmap(void) { - unsigned long bank_start, prev_bank_end = 0; - unsigned int i; + unsigned long start, prev_end = 0; + struct memblock_region *reg; /* * This relies on each bank being in address order. * The banks are sorted previously in bootmem_init(). */ - for_each_bank(i, mi) { - struct membank *bank = &mi->bank[i]; - - bank_start = bank_pfn_start(bank); + for_each_memblock(memory, reg) { + start = memblock_region_memory_base_pfn(reg); +#ifdef CONFIG_SPARSEMEM + /* + * Take care not to free memmap entries that don't exist + * due to SPARSEMEM sections which aren't present. + */ + start = min(start, + ALIGN(prev_end, PAGES_PER_SECTION)); +#else + /* + * Align down here since the VM subsystem insists that the + * memmap entries are valid from the bank start aligned to + * MAX_ORDER_NR_PAGES. + */ + start = round_down(start, MAX_ORDER_NR_PAGES); +#endif /* * If we had a previous bank, and there is a space * between the current bank and the previous, free it. */ - if (prev_bank_end && prev_bank_end < bank_start) - free_memmap(prev_bank_end, bank_start); + if (prev_end && prev_end < start) + free_memmap(prev_end, start); /* * Align up here since the VM subsystem insists that the * memmap entries are valid from the bank end aligned to * MAX_ORDER_NR_PAGES. */ - prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES); + prev_end = ALIGN(memblock_region_memory_end_pfn(reg), + MAX_ORDER_NR_PAGES); } + +#ifdef CONFIG_SPARSEMEM + if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) + free_memmap(prev_end, + ALIGN(prev_end, PAGES_PER_SECTION)); +#endif } +#ifdef CONFIG_HIGHMEM +static inline void free_area_high(unsigned long pfn, unsigned long end) +{ + for (; pfn < end; pfn++) + free_highmem_page(pfn_to_page(pfn)); +} +#endif + static void __init free_highpages(void) { #ifdef CONFIG_HIGHMEM - unsigned long max_low = max_low_pfn + PHYS_PFN_OFFSET; + unsigned long max_low = max_low_pfn; struct memblock_region *mem, *res; /* set highmem page free */ @@ -471,8 +499,7 @@ static void __init free_highpages(void) if (res_end > end) res_end = end; if (res_start != start) - totalhigh_pages += free_area(start, res_start, - NULL); + free_area_high(start, res_start); start = res_end; if (start == end) break; @@ -480,9 +507,8 @@ static void __init free_highpages(void) /* And now free anything which remains */ if (start < end) - totalhigh_pages += free_area(start, end, NULL); + free_area_high(start, end); } - totalram_pages += totalhigh_pages; #endif } @@ -493,71 +519,26 @@ static void __init free_highpages(void) */ void __init mem_init(void) { - unsigned long reserved_pages, free_pages; - struct memblock_region *reg; - int i; #ifdef CONFIG_HAVE_TCM /* These pointers are filled in on TCM detection */ extern u32 dtcm_end; extern u32 itcm_end; #endif - max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; + set_max_mapnr(pfn_to_page(max_pfn) - mem_map); /* this will put all unused low memory onto the freelists */ - free_unused_memmap(&meminfo); - - totalram_pages += free_all_bootmem(); + free_unused_memmap(); + free_all_bootmem(); #ifdef CONFIG_SA1111 /* now that our DMA memory is actually so designated, we can free it */ - totalram_pages += free_area(PHYS_PFN_OFFSET, - __phys_to_pfn(__pa(swapper_pg_dir)), NULL); + free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, -1, NULL); #endif free_highpages(); - reserved_pages = free_pages = 0; - - for_each_bank(i, &meminfo) { - struct membank *bank = &meminfo.bank[i]; - unsigned int pfn1, pfn2; - struct page *page, *end; - - pfn1 = bank_pfn_start(bank); - pfn2 = bank_pfn_end(bank); - - page = pfn_to_page(pfn1); - end = pfn_to_page(pfn2 - 1) + 1; - - do { - if (PageReserved(page)) - reserved_pages++; - else if (!page_count(page)) - free_pages++; - page++; - } while (page < end); - } - - /* - * Since our memory may not be contiguous, calculate the - * real number of pages we have in this system - */ - printk(KERN_INFO "Memory:"); - num_physpages = 0; - for_each_memblock(memory, reg) { - unsigned long pages = memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); - num_physpages += pages; - printk(" %ldMB", pages >> (20 - PAGE_SHIFT)); - } - printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); - - printk(KERN_NOTICE "Memory: %luk/%luk available, %luk reserved, %luK highmem\n", - nr_free_pages() << (PAGE_SHIFT-10), - free_pages << (PAGE_SHIFT-10), - reserved_pages << (PAGE_SHIFT-10), - totalhigh_pages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); #define MLK(b, t) b, t, ((t) - (b)) >> 10 #define MLM(b, t) b, t, ((t) - (b)) >> 20 @@ -570,18 +551,18 @@ void __init mem_init(void) " ITCM : 0x%08lx - 0x%08lx (%4ld kB)\n" #endif " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -#ifdef CONFIG_MMU - " DMA : 0x%08lx - 0x%08lx (%4ld MB)\n" -#endif " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" #ifdef CONFIG_HIGHMEM " pkmap : 0x%08lx - 0x%08lx (%4ld MB)\n" #endif +#ifdef CONFIG_MODULES " modules : 0x%08lx - 0x%08lx (%4ld MB)\n" - " .init : 0x%p" " - 0x%p" " (%4d kB)\n" +#endif " .text : 0x%p" " - 0x%p" " (%4d kB)\n" - " .data : 0x%p" " - 0x%p" " (%4d kB)\n", + " .init : 0x%p" " - 0x%p" " (%4d kB)\n" + " .data : 0x%p" " - 0x%p" " (%4d kB)\n" + " .bss : 0x%p" " - 0x%p" " (%4d kB)\n", MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) + (PAGE_SIZE)), @@ -590,20 +571,20 @@ void __init mem_init(void) MLK(ITCM_OFFSET, (unsigned long) itcm_end), #endif MLK(FIXADDR_START, FIXADDR_TOP), -#ifdef CONFIG_MMU - MLM(CONSISTENT_BASE, CONSISTENT_END), -#endif MLM(VMALLOC_START, VMALLOC_END), MLM(PAGE_OFFSET, (unsigned long)high_memory), #ifdef CONFIG_HIGHMEM MLM(PKMAP_BASE, (PKMAP_BASE) + (LAST_PKMAP) * (PAGE_SIZE)), #endif +#ifdef CONFIG_MODULES MLM(MODULES_VADDR, MODULES_END), +#endif - MLK_ROUNDUP(__init_begin, __init_end), MLK_ROUNDUP(_text, _etext), - MLK_ROUNDUP(_sdata, _edata)); + MLK_ROUNDUP(__init_begin, __init_end), + MLK_ROUNDUP(_sdata, _edata), + MLK_ROUNDUP(__bss_start, __bss_stop)); #undef MLK #undef MLM @@ -614,9 +595,6 @@ void __init mem_init(void) * be detected at build time already. */ #ifdef CONFIG_MMU - BUILD_BUG_ON(VMALLOC_END > CONSISTENT_BASE); - BUG_ON(VMALLOC_END > CONSISTENT_BASE); - BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); BUG_ON(TASK_SIZE > MODULES_VADDR); #endif @@ -626,7 +604,7 @@ void __init mem_init(void) BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); #endif - if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; /* * On a machine this small we won't get @@ -642,15 +620,13 @@ void free_initmem(void) #ifdef CONFIG_HAVE_TCM extern char __tcm_start, __tcm_end; - totalram_pages += free_area(__phys_to_pfn(__pa(&__tcm_start)), - __phys_to_pfn(__pa(&__tcm_end)), - "TCM link"); + poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start); + free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link"); #endif + poison_init_mem(__init_begin, __init_end - __init_begin); if (!machine_is_integrator() && !machine_is_cintegrator()) - totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)), - __phys_to_pfn(__pa(__init_end)), - "init"); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD @@ -659,10 +635,10 @@ static int keep_initrd; void free_initrd_mem(unsigned long start, unsigned long end) { - if (!keep_initrd) - totalram_pages += free_area(__phys_to_pfn(__pa(start)), - __phys_to_pfn(__pa(end)), - "initrd"); + if (!keep_initrd) { + poison_init_mem((void *)start, PAGE_ALIGN(end) - start); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); + } } static int __init keepinitrd_setup(char *__unused) diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c index ffad039cbb7..4614208369f 100644 --- a/arch/arm/mm/iomap.c +++ b/arch/arm/mm/iomap.c @@ -9,6 +9,9 @@ #include <linux/ioport.h> #include <linux/io.h> +unsigned long vga_base; +EXPORT_SYMBOL(vga_base); + #ifdef __io void __iomem *ioport_map(unsigned long port, unsigned int nr) { @@ -23,26 +26,11 @@ EXPORT_SYMBOL(ioport_unmap); #endif #ifdef CONFIG_PCI -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) -{ - resource_size_t start = pci_resource_start(dev, bar); - resource_size_t len = pci_resource_len(dev, bar); - unsigned long flags = pci_resource_flags(dev, bar); +unsigned long pcibios_min_io = 0x1000; +EXPORT_SYMBOL(pcibios_min_io); - if (!len || !start) - return NULL; - if (maxlen && len > maxlen) - len = maxlen; - if (flags & IORESOURCE_IO) - return ioport_map(start, len); - if (flags & IORESOURCE_MEM) { - if (flags & IORESOURCE_CACHEABLE) - return ioremap(start, len); - return ioremap_nocache(start, len); - } - return NULL; -} -EXPORT_SYMBOL(pci_iomap); +unsigned long pcibios_min_mem = 0x01000000; +EXPORT_SYMBOL(pcibios_min_mem); void pci_iounmap(struct pci_dev *dev, void __iomem *addr) { diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 17e7b0b57e4..d1e5ad7ab3b 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -25,22 +25,83 @@ #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/io.h> +#include <linux/sizes.h> +#include <asm/cp15.h> #include <asm/cputype.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> -#include <asm/sizes.h> +#include <asm/system_info.h> #include <asm/mach/map.h> +#include <asm/mach/pci.h> #include "mm.h" -/* - * Used by ioremap() and iounmap() code to mark (super)section-mapped - * I/O regions in vm_struct->flags field. - */ -#define VM_ARM_SECTION_MAPPING 0x80000000 + +LIST_HEAD(static_vmlist); + +static struct static_vm *find_static_vm_paddr(phys_addr_t paddr, + size_t size, unsigned int mtype) +{ + struct static_vm *svm; + struct vm_struct *vm; + + list_for_each_entry(svm, &static_vmlist, list) { + vm = &svm->vm; + if (!(vm->flags & VM_ARM_STATIC_MAPPING)) + continue; + if ((vm->flags & VM_ARM_MTYPE_MASK) != VM_ARM_MTYPE(mtype)) + continue; + + if (vm->phys_addr > paddr || + paddr + size - 1 > vm->phys_addr + vm->size - 1) + continue; + + return svm; + } + + return NULL; +} + +struct static_vm *find_static_vm_vaddr(void *vaddr) +{ + struct static_vm *svm; + struct vm_struct *vm; + + list_for_each_entry(svm, &static_vmlist, list) { + vm = &svm->vm; + + /* static_vmlist is ascending order */ + if (vm->addr > vaddr) + break; + + if (vm->addr <= vaddr && vm->addr + vm->size > vaddr) + return svm; + } + + return NULL; +} + +void __init add_static_vm_early(struct static_vm *svm) +{ + struct static_vm *curr_svm; + struct vm_struct *vm; + void *vaddr; + + vm = &svm->vm; + vm_area_add_early(vm); + vaddr = vm->addr; + + list_for_each_entry(curr_svm, &static_vmlist, list) { + vm = &curr_svm->vm; + + if (vm->addr > vaddr) + break; + } + list_add_tail(&svm->list, &curr_svm->list); +} int ioremap_page(unsigned long virt, unsigned long phys, const struct mem_type *mtype) @@ -50,21 +111,21 @@ int ioremap_page(unsigned long virt, unsigned long phys, } EXPORT_SYMBOL(ioremap_page); -void __check_kvm_seq(struct mm_struct *mm) +void __check_vmalloc_seq(struct mm_struct *mm) { unsigned int seq; do { - seq = init_mm.context.kvm_seq; + seq = init_mm.context.vmalloc_seq; memcpy(pgd_offset(mm, VMALLOC_START), pgd_offset_k(VMALLOC_START), sizeof(pgd_t) * (pgd_index(VMALLOC_END) - pgd_index(VMALLOC_START))); - mm->context.kvm_seq = seq; - } while (seq != init_mm.context.kvm_seq); + mm->context.vmalloc_seq = seq; + } while (seq != init_mm.context.vmalloc_seq); } -#ifndef CONFIG_SMP +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) /* * Section support is unsafe on SMP - If you iounmap and ioremap a region, * the other CPUs will not see this change until their next context switch. @@ -79,23 +140,26 @@ static void unmap_area_sections(unsigned long virt, unsigned long size) { unsigned long addr = virt, end = virt + (size & ~(SZ_1M - 1)); pgd_t *pgd; + pud_t *pud; + pmd_t *pmdp; flush_cache_vunmap(addr, end); pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmdp = pmd_offset(pud, addr); do { - pmd_t pmd, *pmdp = pmd_offset(pgd, addr); + pmd_t pmd = *pmdp; - pmd = *pmdp; if (!pmd_none(pmd)) { /* * Clear the PMD from the page table, and - * increment the kvm sequence so others + * increment the vmalloc sequence so others * notice this change. * * Note: this is still racy on SMP machines. */ pmd_clear(pmdp); - init_mm.context.kvm_seq++; + init_mm.context.vmalloc_seq++; /* * Free the page table, if there was one. @@ -104,16 +168,16 @@ static void unmap_area_sections(unsigned long virt, unsigned long size) pte_free_kernel(&init_mm, pmd_page_vaddr(pmd)); } - addr += PGDIR_SIZE; - pgd++; + addr += PMD_SIZE; + pmdp += 2; } while (addr < end); /* * Ensure that the active_mm is up to date - we want to * catch any use-after-iounmap cases. */ - if (current->active_mm->context.kvm_seq != init_mm.context.kvm_seq) - __check_kvm_seq(current->active_mm); + if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq) + __check_vmalloc_seq(current->active_mm); flush_tlb_kernel_range(virt, end); } @@ -124,6 +188,8 @@ remap_area_sections(unsigned long virt, unsigned long pfn, { unsigned long addr = virt, end = virt + size; pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; /* * Remove and free any PTE-based mapping, and @@ -132,17 +198,17 @@ remap_area_sections(unsigned long virt, unsigned long pfn, unmap_area_sections(virt, size); pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); do { - pmd_t *pmd = pmd_offset(pgd, addr); - pmd[0] = __pmd(__pfn_to_phys(pfn) | type->prot_sect); pfn += SZ_1M >> PAGE_SHIFT; pmd[1] = __pmd(__pfn_to_phys(pfn) | type->prot_sect); pfn += SZ_1M >> PAGE_SHIFT; flush_pmd_entry(pmd); - addr += PGDIR_SIZE; - pgd++; + addr += PMD_SIZE; + pmd += 2; } while (addr < end); return 0; @@ -154,6 +220,8 @@ remap_area_supersections(unsigned long virt, unsigned long pfn, { unsigned long addr = virt, end = virt + size; pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; /* * Remove and free any PTE-based mapping, and @@ -162,6 +230,8 @@ remap_area_supersections(unsigned long virt, unsigned long pfn, unmap_area_sections(virt, size); pgd = pgd_offset_k(virt); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); do { unsigned long super_pmd_val, i; @@ -170,14 +240,12 @@ remap_area_supersections(unsigned long virt, unsigned long pfn, super_pmd_val |= ((pfn >> (32 - PAGE_SHIFT)) & 0xf) << 20; for (i = 0; i < 8; i++) { - pmd_t *pmd = pmd_offset(pgd, addr); - pmd[0] = __pmd(super_pmd_val); pmd[1] = __pmd(super_pmd_val); flush_pmd_entry(pmd); - addr += PGDIR_SIZE; - pgd++; + addr += PMD_SIZE; + pmd += 2; } pfn += SUPERSECTION_SIZE >> PAGE_SHIFT; @@ -193,23 +261,16 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, const struct mem_type *type; int err; unsigned long addr; - struct vm_struct * area; + struct vm_struct *area; + phys_addr_t paddr = __pfn_to_phys(pfn); +#ifndef CONFIG_ARM_LPAE /* * High mappings must be supersection aligned */ - if (pfn >= 0x100000 && (__pfn_to_phys(pfn) & ~SUPERSECTION_MASK)) + if (pfn >= 0x100000 && (paddr & ~SUPERSECTION_MASK)) return NULL; - - /* - * Don't allow RAM to be mapped - this causes problems with ARMv6+ - */ - if (pfn_valid(pfn)) { - printk(KERN_WARNING "BUG: Your driver calls ioremap() on system memory. This leads\n" - KERN_WARNING "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n" - KERN_WARNING "will fail in the next kernel release. Please fix your driver.\n"); - WARN_ON(1); - } +#endif type = get_mem_type(mtype); if (!type) @@ -220,24 +281,45 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, */ size = PAGE_ALIGN(offset + size); + /* + * Try to reuse one of the static mapping whenever possible. + */ + if (size && !(sizeof(phys_addr_t) == 4 && pfn >= 0x100000)) { + struct static_vm *svm; + + svm = find_static_vm_paddr(paddr, size, mtype); + if (svm) { + addr = (unsigned long)svm->vm.addr; + addr += paddr - svm->vm.phys_addr; + return (void __iomem *) (offset + addr); + } + } + + /* + * Don't allow RAM to be mapped - this causes problems with ARMv6+ + */ + if (WARN_ON(pfn_valid(pfn))) + return NULL; + area = get_vm_area_caller(size, VM_IOREMAP, caller); if (!area) return NULL; addr = (unsigned long)area->addr; + area->phys_addr = paddr; -#ifndef CONFIG_SMP +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) if (DOMAIN_IO == 0 && (((cpu_architecture() >= CPU_ARCH_ARMv6) && (get_cr() & CR_XP)) || cpu_is_xsc3()) && pfn >= 0x100000 && - !((__pfn_to_phys(pfn) | size | addr) & ~SUPERSECTION_MASK)) { + !((paddr | size | addr) & ~SUPERSECTION_MASK)) { area->flags |= VM_ARM_SECTION_MAPPING; err = remap_area_supersections(addr, pfn, size, type); - } else if (!((__pfn_to_phys(pfn) | size | addr) & ~PMD_MASK)) { + } else if (!((paddr | size | addr) & ~PMD_MASK)) { area->flags |= VM_ARM_SECTION_MAPPING; err = remap_area_sections(addr, pfn, size, type); } else #endif - err = ioremap_page_range(addr, addr + size, __pfn_to_phys(pfn), + err = ioremap_page_range(addr, addr + size, paddr, __pgprot(type->prot_pte)); if (err) { @@ -249,10 +331,10 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, return (void __iomem *) (offset + addr); } -void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { - unsigned long last_addr; + phys_addr_t last_addr; unsigned long offset = phys_addr & ~PAGE_MASK; unsigned long pfn = __phys_to_pfn(phys_addr); @@ -285,40 +367,92 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, } EXPORT_SYMBOL(__arm_ioremap_pfn); +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, + unsigned int, void *) = + __arm_ioremap_caller; + void __iomem * -__arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) +__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) { + return arch_ioremap_caller(phys_addr, size, mtype, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(__arm_ioremap); + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space as memory. Needed when the kernel wants to execute + * code in external memory. This is needed for reprogramming source + * clocks that would affect normal memory for example. Please see + * CONFIG_GENERIC_ALLOCATOR for allocating external memory. + */ +void __iomem * +__arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached) +{ + unsigned int mtype; + + if (cached) + mtype = MT_MEMORY_RWX; + else + mtype = MT_MEMORY_RWX_NONCACHED; + return __arm_ioremap_caller(phys_addr, size, mtype, __builtin_return_address(0)); } -EXPORT_SYMBOL(__arm_ioremap); void __iounmap(volatile void __iomem *io_addr) { void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr); -#ifndef CONFIG_SMP - struct vm_struct **p, *tmp; - - /* - * If this is a section based mapping we need to handle it - * specially as the VM subsystem does not know how to handle - * such a beast. We need the lock here b/c we need to clear - * all the mappings before the area can be reclaimed - * by someone else. - */ - write_lock(&vmlist_lock); - for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) { - if ((tmp->flags & VM_IOREMAP) && (tmp->addr == addr)) { - if (tmp->flags & VM_ARM_SECTION_MAPPING) { - unmap_area_sections((unsigned long)tmp->addr, - tmp->size); - } - break; - } + struct static_vm *svm; + + /* If this is a static mapping, we must leave it alone */ + svm = find_static_vm_vaddr(addr); + if (svm) + return; + +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) + { + struct vm_struct *vm; + + vm = find_vm_area(addr); + + /* + * If this is a section based mapping we need to handle it + * specially as the VM subsystem does not know how to handle + * such a beast. + */ + if (vm && (vm->flags & VM_ARM_SECTION_MAPPING)) + unmap_area_sections((unsigned long)vm->addr, vm->size); } - write_unlock(&vmlist_lock); #endif vunmap(addr); } -EXPORT_SYMBOL(__iounmap); + +void (*arch_iounmap)(volatile void __iomem *) = __iounmap; + +void __arm_iounmap(volatile void __iomem *io_addr) +{ + arch_iounmap(io_addr); +} +EXPORT_SYMBOL(__arm_iounmap); + +#ifdef CONFIG_PCI +static int pci_ioremap_mem_type = MT_DEVICE; + +void pci_ioremap_set_mem_type(int mem_type) +{ + pci_ioremap_mem_type = mem_type; +} + +int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) +{ + BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + + return ioremap_page_range(PCI_IO_VIRT_BASE + offset, + PCI_IO_VIRT_BASE + offset + SZ_64K, + phys_addr, + __pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte)); +} +EXPORT_SYMBOL_GPL(pci_ioremap_io); +#endif diff --git a/arch/arm/mm/l2c-common.c b/arch/arm/mm/l2c-common.c new file mode 100644 index 00000000000..10a3cf28c36 --- /dev/null +++ b/arch/arm/mm/l2c-common.c @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2010 ARM Ltd. + * Written by Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/bug.h> +#include <linux/smp.h> +#include <asm/outercache.h> + +void outer_disable(void) +{ + WARN_ON(!irqs_disabled()); + WARN_ON(num_online_cpus() > 1); + + if (outer_cache.disable) + outer_cache.disable(); +} diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S new file mode 100644 index 00000000000..99b05f21a59 --- /dev/null +++ b/arch/arm/mm/l2c-l2x0-resume.S @@ -0,0 +1,58 @@ +/* + * L2C-310 early resume code. This can be used by platforms to restore + * the settings of their L2 cache controller before restoring the + * processor state. + * + * This code can only be used to if you are running in the secure world. + */ +#include <linux/linkage.h> +#include <asm/hardware/cache-l2x0.h> + + .text + +ENTRY(l2c310_early_resume) + adr r0, 1f + ldr r2, [r0] + add r0, r2, r0 + + ldmia r0, {r1, r2, r3, r4, r5, r6, r7, r8} + @ r1 = phys address of L2C-310 controller + @ r2 = aux_ctrl + @ r3 = tag_latency + @ r4 = data_latency + @ r5 = filter_start + @ r6 = filter_end + @ r7 = prefetch_ctrl + @ r8 = pwr_ctrl + + @ Check that the address has been initialised + teq r1, #0 + moveq pc, lr + + @ The prefetch and power control registers are revision dependent + @ and can be written whether or not the L2 cache is enabled + ldr r0, [r1, #L2X0_CACHE_ID] + and r0, r0, #L2X0_CACHE_ID_RTL_MASK + cmp r0, #L310_CACHE_ID_RTL_R2P0 + strcs r7, [r1, #L310_PREFETCH_CTRL] + cmp r0, #L310_CACHE_ID_RTL_R3P0 + strcs r8, [r1, #L310_POWER_CTRL] + + @ Don't setup the L2 cache if it is already enabled + ldr r0, [r1, #L2X0_CTRL] + tst r0, #L2X0_CTRL_EN + movne pc, lr + + str r3, [r1, #L310_TAG_LATENCY_CTRL] + str r4, [r1, #L310_DATA_LATENCY_CTRL] + str r6, [r1, #L310_ADDR_FILTER_END] + str r5, [r1, #L310_ADDR_FILTER_START] + + str r2, [r1, #L2X0_AUX_CTRL] + mov r9, #L2X0_CTRL_EN + str r9, [r1, #L2X0_CTRL] + mov pc, lr +ENDPROC(l2c310_early_resume) + + .align +1: .long l2x0_saved_regs - . diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 6630620380a..ce727d47275 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -1,24 +1,48 @@ #ifdef CONFIG_MMU +#include <linux/list.h> +#include <linux/vmalloc.h> + +#include <asm/pgtable.h> /* the upper-most page table pointer */ extern pmd_t *top_pmd; -#define TOP_PTE(x) pte_offset_kernel(top_pmd, x) +/* + * 0xffff8000 to 0xffffffff is reserved for any ARM architecture + * specific hacks for copying pages efficiently, while 0xffff4000 + * is reserved for VIPT aliasing flushing by generic code. + * + * Note that we don't allow VIPT aliasing caches with SMP. + */ +#define COPYPAGE_MINICACHE 0xffff8000 +#define COPYPAGE_V6_FROM 0xffff8000 +#define COPYPAGE_V6_TO 0xffffc000 +/* PFN alias flushing, for VIPT caches */ +#define FLUSH_ALIAS_START 0xffff4000 -static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt) +static inline void set_top_pte(unsigned long va, pte_t pte) { - return pmd_offset(pgd, virt); + pte_t *ptep = pte_offset_kernel(top_pmd, va); + set_pte_ext(ptep, pte, 0); + local_flush_tlb_kernel_page(va); +} + +static inline pte_t get_top_pte(unsigned long va) +{ + pte_t *ptep = pte_offset_kernel(top_pmd, va); + return *ptep; } static inline pmd_t *pmd_off_k(unsigned long virt) { - return pmd_off(pgd_offset_k(virt), virt); + return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt); } struct mem_type { - unsigned int prot_pte; - unsigned int prot_l1; - unsigned int prot_sect; + pteval_t prot_pte; + pteval_t prot_pte_s2; + pmdval_t prot_l1; + pmdval_t prot_sect; unsigned int domain; }; @@ -26,7 +50,50 @@ const struct mem_type *get_mem_type(unsigned int type); extern void __flush_dcache_page(struct address_space *mapping, struct page *page); +/* + * ARM specific vm_struct->flags bits. + */ + +/* (super)section-mapped I/O regions used by ioremap()/iounmap() */ +#define VM_ARM_SECTION_MAPPING 0x80000000 + +/* permanent static mappings from iotable_init() */ +#define VM_ARM_STATIC_MAPPING 0x40000000 + +/* empty mapping */ +#define VM_ARM_EMPTY_MAPPING 0x20000000 + +/* mapping type (attributes) for permanent static mappings */ +#define VM_ARM_MTYPE(mt) ((mt) << 20) +#define VM_ARM_MTYPE_MASK (0x1f << 20) + +/* consistent regions used by dma_alloc_attrs() */ +#define VM_ARM_DMA_CONSISTENT 0x20000000 + + +struct static_vm { + struct vm_struct vm; + struct list_head list; +}; + +extern struct list_head static_vmlist; +extern struct static_vm *find_static_vm_vaddr(void *vaddr); +extern __init void add_static_vm_early(struct static_vm *svm); + #endif +#ifdef CONFIG_ZONE_DMA +extern phys_addr_t arm_dma_limit; +extern unsigned long arm_dma_pfn_limit; +#else +#define arm_dma_limit ((phys_addr_t)~0) +#define arm_dma_pfn_limit (~0ul >> PAGE_SHIFT) +#endif + +extern phys_addr_t arm_lowmem_limit; + void __init bootmem_init(void); void arm_mm_memblock_reserve(void); +void dma_contiguous_remap(void); + +unsigned long __clear_cr(unsigned long mask); diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index b0a98305055..5e85ed37136 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -7,14 +7,41 @@ #include <linux/shm.h> #include <linux/sched.h> #include <linux/io.h> +#include <linux/personality.h> #include <linux/random.h> -#include <asm/cputype.h> -#include <asm/system.h> +#include <asm/cachetype.h> #define COLOUR_ALIGN(addr,pgoff) \ ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) +/* gap between mmap and stack */ +#define MIN_GAP (128*1024*1024UL) +#define MAX_GAP ((TASK_SIZE)/6*5) + +static int mmap_is_legacy(void) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +static unsigned long mmap_base(unsigned long rnd) +{ + unsigned long gap = rlimit(RLIMIT_STACK); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(TASK_SIZE - gap - rnd); +} + /* * We need to ensure that shared mappings are correctly aligned to * avoid aliasing issues with VIPT caches. We need to ensure that @@ -30,26 +57,16 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; -#ifdef CONFIG_CPU_V6 - unsigned int cache_type; - int do_align = 0, aliasing = 0; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; /* * We only need to do colour alignment if either the I or D - * caches alias. This is indicated by bits 9 and 21 of the - * cache type register. + * caches alias. */ - cache_type = read_cpuid_cachetype(); - if (cache_type != read_cpuid_id()) { - aliasing = (cache_type | cache_type >> 12) & (1 << 11); - if (aliasing) - do_align = filp || flags & MAP_SHARED; - } -#else -#define do_align 0 -#define aliasing 0 -#endif + if (aliasing) + do_align = filp || (flags & MAP_SHARED); /* * We enforce the MAP_FIXED case. @@ -75,57 +92,106 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, (!vma || addr + len <= vma->vm_start)) return addr; } - if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; - } else { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; + + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; + + /* + * We only need to do colour alignment if either the I or D + * caches alias. + */ + if (aliasing) + do_align = filp || (flags & MAP_SHARED); + + /* requested length too big for entire address space */ + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) + return -EINVAL; + return addr; } - /* 8 bits of randomness in 20 address space bits */ - if (current->flags & PF_RANDOMIZE) - addr += (get_random_int() % (1 << 8)) << PAGE_SHIFT; - -full_search: - if (do_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (!vma || addr + len <= vma->vm_start) { - /* - * Remember the place where we stopped the search: - */ - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = vma->vm_end; + + /* requesting a specific address */ + if (addr) { if (do_align) addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; } + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = FIRST_USER_ADDRESS; + info.high_limit = mm->mmap_base; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } + + return addr; } +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + unsigned long random_factor = 0UL; + + /* 8 bits of randomness in 20 address space bits */ + if ((current->flags & PF_RANDOMIZE) && + !(current->personality & ADDR_NO_RANDOMIZE)) + random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT; + + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + } else { + mm->mmap_base = mmap_base(random_factor); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + } +} /* * You really shouldn't be using read() or write() on /dev/mem. This * might go away in the future. */ -int valid_phys_addr_range(unsigned long addr, size_t size) +int valid_phys_addr_range(phys_addr_t addr, size_t size) { if (addr < PHYS_OFFSET) return 0; @@ -136,13 +202,11 @@ int valid_phys_addr_range(unsigned long addr, size_t size) } /* - * We don't use supersection mappings for mmap() on /dev/mem, which - * means that we can't map the memory area above the 4G barrier into - * userspace. + * Do not allow /dev/mem mappings beyond the supported physical range. */ int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) { - return !(pfn + (size >> PAGE_SHIFT) > 0x00100000); + return (pfn + (size >> PAGE_SHIFT)) <= (1 + (PHYS_MASK >> PAGE_SHIFT)); } #ifdef CONFIG_STRICT_DEVMEM diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 72ad3e1f56c..6e3ba8d112a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -15,22 +15,30 @@ #include <linux/nodemask.h> #include <linux/memblock.h> #include <linux/fs.h> +#include <linux/vmalloc.h> +#include <linux/sizes.h> +#include <asm/cp15.h> #include <asm/cputype.h> #include <asm/sections.h> #include <asm/cachetype.h> +#include <asm/sections.h> #include <asm/setup.h> -#include <asm/sizes.h> #include <asm/smp_plat.h> #include <asm/tlb.h> #include <asm/highmem.h> +#include <asm/system_info.h> +#include <asm/traps.h> +#include <asm/procinfo.h> +#include <asm/memory.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> +#include <asm/mach/pci.h> +#include <asm/fixmap.h> #include "mm.h" - -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +#include "tcm.h" /* * empty_zero_page is a special page that is used for @@ -54,6 +62,9 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; static unsigned int ecc_mask __initdata = 0; pgprot_t pgprot_user; pgprot_t pgprot_kernel; +pgprot_t pgprot_hyp_device; +pgprot_t pgprot_s2; +pgprot_t pgprot_s2_device; EXPORT_SYMBOL(pgprot_user); EXPORT_SYMBOL(pgprot_kernel); @@ -61,61 +72,100 @@ EXPORT_SYMBOL(pgprot_kernel); struct cachepolicy { const char policy[16]; unsigned int cr_mask; - unsigned int pmd; - unsigned int pte; + pmdval_t pmd; + pteval_t pte; + pteval_t pte_s2; }; +#ifdef CONFIG_ARM_LPAE +#define s2_policy(policy) policy +#else +#define s2_policy(policy) 0 +#endif + static struct cachepolicy cache_policies[] __initdata = { { .policy = "uncached", .cr_mask = CR_W|CR_C, .pmd = PMD_SECT_UNCACHED, .pte = L_PTE_MT_UNCACHED, + .pte_s2 = s2_policy(L_PTE_S2_MT_UNCACHED), }, { .policy = "buffered", .cr_mask = CR_C, .pmd = PMD_SECT_BUFFERED, .pte = L_PTE_MT_BUFFERABLE, + .pte_s2 = s2_policy(L_PTE_S2_MT_UNCACHED), }, { .policy = "writethrough", .cr_mask = 0, .pmd = PMD_SECT_WT, .pte = L_PTE_MT_WRITETHROUGH, + .pte_s2 = s2_policy(L_PTE_S2_MT_WRITETHROUGH), }, { .policy = "writeback", .cr_mask = 0, .pmd = PMD_SECT_WB, .pte = L_PTE_MT_WRITEBACK, + .pte_s2 = s2_policy(L_PTE_S2_MT_WRITEBACK), }, { .policy = "writealloc", .cr_mask = 0, .pmd = PMD_SECT_WBWA, .pte = L_PTE_MT_WRITEALLOC, + .pte_s2 = s2_policy(L_PTE_S2_MT_WRITEBACK), } }; +#ifdef CONFIG_CPU_CP15 +static unsigned long initial_pmd_value __initdata = 0; + /* - * These are useful for identifying cache coherency - * problems by allowing the cache or the cache and - * writebuffer to be turned off. (Note: the write - * buffer should not be on and the cache off). + * Initialise the cache_policy variable with the initial state specified + * via the "pmd" value. This is used to ensure that on ARMv6 and later, + * the C code sets the page tables up with the same policy as the head + * assembly code, which avoids an illegal state where the TLBs can get + * confused. See comments in early_cachepolicy() for more information. */ -static int __init early_cachepolicy(char *p) +void __init init_default_cache_policy(unsigned long pmd) { int i; + initial_pmd_value = pmd; + + pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE; + + for (i = 0; i < ARRAY_SIZE(cache_policies); i++) + if (cache_policies[i].pmd == pmd) { + cachepolicy = i; + break; + } + + if (i == ARRAY_SIZE(cache_policies)) + pr_err("ERROR: could not find cache policy\n"); +} + +/* + * These are useful for identifying cache coherency problems by allowing + * the cache or the cache and writebuffer to be turned off. (Note: the + * write buffer should not be on and the cache off). + */ +static int __init early_cachepolicy(char *p) +{ + int i, selected = -1; + for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { int len = strlen(cache_policies[i].policy); if (memcmp(p, cache_policies[i].policy, len) == 0) { - cachepolicy = i; - cr_alignment &= ~cache_policies[i].cr_mask; - cr_no_alignment &= ~cache_policies[i].cr_mask; + selected = i; break; } } - if (i == ARRAY_SIZE(cache_policies)) - printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n"); + + if (selected == -1) + pr_err("ERROR: unknown or unsupported cache policy\n"); + /* * This restriction is partly to do with the way we boot; it is * unpredictable to have memory mapped using two different sets of @@ -123,12 +173,18 @@ static int __init early_cachepolicy(char *p) * change these attributes once the initial assembly has setup the * page tables. */ - if (cpu_architecture() >= CPU_ARCH_ARMv6) { - printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n"); - cachepolicy = CPOLICY_WRITEBACK; + if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) { + pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n", + cache_policies[cachepolicy].policy); + return 0; + } + + if (selected != cachepolicy) { + unsigned long cr = __clear_cr(cache_policies[selected].cr_mask); + cachepolicy = selected; + flush_cache_all(); + set_cr(cr); } - flush_cache_all(); - set_cr(cr_alignment); return 0; } early_param("cachepolicy", early_cachepolicy); @@ -151,6 +207,7 @@ static int __init early_nowrite(char *__unused) } early_param("nowb", early_nowrite); +#ifndef CONFIG_ARM_LPAE static int __init early_ecc(char *p) { if (memcmp(p, "on", 2) == 0) @@ -160,43 +217,35 @@ static int __init early_ecc(char *p) return 0; } early_param("ecc", early_ecc); +#endif -static int __init noalign_setup(char *__unused) +#else /* ifdef CONFIG_CPU_CP15 */ + +static int __init early_cachepolicy(char *p) { - cr_alignment &= ~CR_A; - cr_no_alignment &= ~CR_A; - set_cr(cr_alignment); - return 1; + pr_warning("cachepolicy kernel parameter not supported without cp15\n"); } -__setup("noalign", noalign_setup); +early_param("cachepolicy", early_cachepolicy); -#ifndef CONFIG_SMP -void adjust_cr(unsigned long mask, unsigned long set) +static int __init noalign_setup(char *__unused) { - unsigned long flags; - - mask &= ~CR_A; - - set &= mask; - - local_irq_save(flags); - - cr_no_alignment = (cr_no_alignment & ~mask) | set; - cr_alignment = (cr_alignment & ~mask) | set; - - set_cr((get_cr() & ~mask) | set); - - local_irq_restore(flags); + pr_warning("noalign kernel parameter not supported without cp15\n"); } -#endif +__setup("noalign", noalign_setup); + +#endif /* ifdef CONFIG_CPU_CP15 / else */ -#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE +#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN +#define PROT_PTE_S2_DEVICE PROT_PTE_DEVICE #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE static struct mem_type mem_types[] = { [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | L_PTE_SHARED, + .prot_pte_s2 = s2_policy(PROT_PTE_S2_DEVICE) | + s2_policy(L_PTE_S2_MT_DEV_SHARED) | + L_PTE_SHARED, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, .domain = DOMAIN_IO, @@ -212,7 +261,7 @@ static struct mem_type mem_types[] = { .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, .domain = DOMAIN_IO, - }, + }, [MT_DEVICE_WC] = { /* ioremap_wc */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, .prot_l1 = PMD_TYPE_TABLE, @@ -229,25 +278,33 @@ static struct mem_type mem_types[] = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, .domain = DOMAIN_KERNEL, }, +#ifndef CONFIG_ARM_LPAE [MT_MINICLEAN] = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE, .domain = DOMAIN_KERNEL, }, +#endif [MT_LOW_VECTORS] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_EXEC, + L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, .domain = DOMAIN_USER, }, [MT_HIGH_VECTORS] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_USER | L_PTE_EXEC, + L_PTE_USER | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, .domain = DOMAIN_USER, }, - [MT_MEMORY] = { + [MT_MEMORY_RWX] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RW] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE | L_PTE_EXEC, + L_PTE_XN, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, @@ -256,23 +313,36 @@ static struct mem_type mem_types[] = { .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, }, - [MT_MEMORY_NONCACHED] = { + [MT_MEMORY_RWX_NONCACHED] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE | L_PTE_EXEC | L_PTE_MT_BUFFERABLE, + L_PTE_MT_BUFFERABLE, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, - [MT_MEMORY_DTCM] = { + [MT_MEMORY_RW_DTCM] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE, + L_PTE_XN, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, .domain = DOMAIN_KERNEL, }, - [MT_MEMORY_ITCM] = { + [MT_MEMORY_RWX_ITCM] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RW_SO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_MT_UNCACHED | L_PTE_XN, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S | + PMD_SECT_UNCACHED | PMD_SECT_XN, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_DMA_READY] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE | L_PTE_EXEC, + L_PTE_XN, .prot_l1 = PMD_TYPE_TABLE, .domain = DOMAIN_KERNEL, }, @@ -284,6 +354,44 @@ const struct mem_type *get_mem_type(unsigned int type) } EXPORT_SYMBOL(get_mem_type); +#define PTE_SET_FN(_name, pteop) \ +static int pte_set_##_name(pte_t *ptep, pgtable_t token, unsigned long addr, \ + void *data) \ +{ \ + pte_t pte = pteop(*ptep); \ +\ + set_pte_ext(ptep, pte, 0); \ + return 0; \ +} \ + +#define SET_MEMORY_FN(_name, callback) \ +int set_memory_##_name(unsigned long addr, int numpages) \ +{ \ + unsigned long start = addr; \ + unsigned long size = PAGE_SIZE*numpages; \ + unsigned end = start + size; \ +\ + if (start < MODULES_VADDR || start >= MODULES_END) \ + return -EINVAL;\ +\ + if (end < MODULES_VADDR || end >= MODULES_END) \ + return -EINVAL; \ +\ + apply_to_page_range(&init_mm, start, size, callback, NULL); \ + flush_tlb_kernel_range(start, end); \ + return 0;\ +} + +PTE_SET_FN(ro, pte_wrprotect) +PTE_SET_FN(rw, pte_mkwrite) +PTE_SET_FN(x, pte_mkexec) +PTE_SET_FN(nx, pte_mknexec) + +SET_MEMORY_FN(ro, pte_set_ro) +SET_MEMORY_FN(rw, pte_set_rw) +SET_MEMORY_FN(x, pte_set_x) +SET_MEMORY_FN(nx, pte_set_nx) + /* * Adjust the PMD section entries according to the CPU in use. */ @@ -291,7 +399,8 @@ static void __init build_mem_type_table(void) { struct cachepolicy *cp; unsigned int cr = get_cr(); - unsigned int user_pgprot, kern_pgprot, vecs_pgprot; + pteval_t user_pgprot, kern_pgprot, vecs_pgprot; + pteval_t hyp_device_pgprot, s2_pgprot, s2_device_pgprot; int cpu_arch = cpu_architecture(); int i; @@ -309,8 +418,17 @@ static void __init build_mem_type_table(void) cachepolicy = CPOLICY_WRITEBACK; ecc_mask = 0; } - if (is_smp()) - cachepolicy = CPOLICY_WRITEALLOC; + + if (is_smp()) { + if (cachepolicy != CPOLICY_WRITEALLOC) { + pr_warn("Forcing write-allocate cache policy for SMP\n"); + cachepolicy = CPOLICY_WRITEALLOC; + } + if (!(initial_pmd_value & PMD_SECT_S)) { + pr_warn("Forcing shared mappings for SMP\n"); + initial_pmd_value |= PMD_SECT_S; + } + } /* * Strip out features not present on earlier architectures. @@ -356,6 +474,9 @@ static void __init build_mem_type_table(void) mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; + + /* Also setup NX memory mapping */ + mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; } if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* @@ -403,27 +524,25 @@ static void __init build_mem_type_table(void) */ cp = &cache_policies[cachepolicy]; vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; + s2_pgprot = cp->pte_s2; + hyp_device_pgprot = mem_types[MT_DEVICE].prot_pte; + s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2; /* - * Only use write-through for non-SMP systems + * We don't use domains on ARMv6 (since this causes problems with + * v6/v7 kernels), so we must use a separate memory type for user + * r/o, kernel r/w to map the vectors page. */ - if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH) - vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte; +#ifndef CONFIG_ARM_LPAE + if (cpu_arch == CPU_ARCH_ARMv6) + vecs_pgprot |= L_PTE_MT_VECTORS; +#endif /* - * Enable CPU-specific coherency if supported. - * (Only available on XSC3 at the moment.) - */ - if (arch_is_coherent() && cpu_is_xsc3()) { - mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; - } - /* * ARMv6 and above have extended page tables. */ if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { +#ifndef CONFIG_ARM_LPAE /* * Mark cache clean areas and XIP ROM read only * from SVC mode and no access from userspace. @@ -431,23 +550,29 @@ static void __init build_mem_type_table(void) mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; +#endif - if (is_smp()) { - /* - * Mark memory with the "shared" attribute - * for SMP systems - */ + /* + * If the initial page tables were created with the S bit + * set, then we need to do the same here for the same + * reasons given in early_cachepolicy(). + */ + if (initial_pmd_value & PMD_SECT_S) { user_pgprot |= L_PTE_SHARED; kern_pgprot |= L_PTE_SHARED; vecs_pgprot |= L_PTE_SHARED; + s2_pgprot |= L_PTE_SHARED; mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; } } @@ -458,19 +583,32 @@ static void __init build_mem_type_table(void) if (cpu_arch >= CPU_ARCH_ARMv6) { if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* Non-cacheable Normal is XCB = 001 */ - mem_types[MT_MEMORY_NONCACHED].prot_sect |= + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERED; } else { /* For both ARMv6 and non-TEX-remapping ARMv7 */ - mem_types[MT_MEMORY_NONCACHED].prot_sect |= + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_TEX(1); } } else { - mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE; + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE; + } + +#ifdef CONFIG_ARM_LPAE + /* + * Do not generate access flag faults for the kernel mappings. + */ + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + mem_types[i].prot_pte |= PTE_EXT_AF; + if (mem_types[i].prot_sect) + mem_types[i].prot_sect |= PMD_SECT_AF; } + kern_pgprot |= PTE_EXT_AF; + vecs_pgprot |= PTE_EXT_AF; +#endif for (i = 0; i < 16; i++) { - unsigned long v = pgprot_val(protection_map[i]); + pteval_t v = pgprot_val(protection_map[i]); protection_map[i] = __pgprot(v | user_pgprot); } @@ -479,13 +617,19 @@ static void __init build_mem_type_table(void) pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | - L_PTE_DIRTY | L_PTE_WRITE | kern_pgprot); + L_PTE_DIRTY | kern_pgprot); + pgprot_s2 = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | s2_pgprot); + pgprot_s2_device = __pgprot(s2_device_pgprot); + pgprot_hyp_device = __pgprot(hyp_device_pgprot); mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; - mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; - mem_types[MT_MEMORY].prot_pte |= kern_pgprot; - mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; + mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; switch (cp->pmd) { @@ -497,8 +641,8 @@ static void __init build_mem_type_table(void) mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; break; } - printk("Memory policy: ECC %sabled, Data cache %s\n", - ecc_mask ? "en" : "dis", cp->policy); + pr_info("Memory policy: %sData cache %s\n", + ecc_mask ? "ECC enabled, " : "", cp->policy); for (i = 0; i < ARRAY_SIZE(mem_types); i++) { struct mem_type *t = &mem_types[i]; @@ -524,18 +668,23 @@ EXPORT_SYMBOL(phys_mem_access_prot); #define vectors_base() (vectors_high() ? 0xffff0000 : 0) -static void __init *early_alloc(unsigned long sz) +static void __init *early_alloc_aligned(unsigned long sz, unsigned long align) { - void *ptr = __va(memblock_alloc(sz, sz)); + void *ptr = __va(memblock_alloc(sz, align)); memset(ptr, 0, sz); return ptr; } +static void __init *early_alloc(unsigned long sz) +{ + return early_alloc_aligned(sz, sz); +} + static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot) { if (pmd_none(*pmd)) { - pte_t *pte = early_alloc(2 * PTRS_PER_PTE * sizeof(pte_t)); - __pmd_populate(pmd, __pa(pte) | prot); + pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE); + __pmd_populate(pmd, __pa(pte), prot); } BUG_ON(pmd_bad(*pmd)); return pte_offset_kernel(pmd, addr); @@ -552,53 +701,94 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_section(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys, - const struct mem_type *type) +static void __init __map_init_section(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) { - pmd_t *pmd = pmd_offset(pgd, addr); + pmd_t *p = pmd; +#ifndef CONFIG_ARM_LPAE /* - * Try a section mapping - end, addr and phys must all be aligned - * to a section boundary. Note that PMDs refer to the individual - * L1 entries, whereas PGDs refer to a group of L1 entries making - * up one logical pointer to an L2 table. + * In classic MMU format, puds and pmds are folded in to + * the pgds. pmd_offset gives the PGD entry. PGDs refer to a + * group of L1 entries making up one logical pointer to + * an L2 table (2MB), where as PMDs refer to the individual + * L1 entries (1MB). Hence increment to get the correct + * offset for odd 1MB sections. + * (See arch/arm/include/asm/pgtable-2level.h) */ - if (((addr | end | phys) & ~SECTION_MASK) == 0) { - pmd_t *p = pmd; + if (addr & SECTION_SIZE) + pmd++; +#endif + do { + *pmd = __pmd(phys | type->prot_sect); + phys += SECTION_SIZE; + } while (pmd++, addr += SECTION_SIZE, addr != end); + + flush_pmd_entry(p); +} - if (addr & SECTION_SIZE) - pmd++; +static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) +{ + pmd_t *pmd = pmd_offset(pud, addr); + unsigned long next; - do { - *pmd = __pmd(phys | type->prot_sect); - phys += SECTION_SIZE; - } while (pmd++, addr += SECTION_SIZE, addr != end); + do { + /* + * With LPAE, we must loop over to map + * all the pmds for the given range. + */ + next = pmd_addr_end(addr, end); - flush_pmd_entry(p); - } else { /* - * No need to loop; pte's aren't interested in the - * individual L1 entries. + * Try a section mapping - addr, next and phys must all be + * aligned to a section boundary. */ - alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type); - } + if (type->prot_sect && + ((addr | next | phys) & ~SECTION_MASK) == 0) { + __map_init_section(pmd, addr, next, phys, type); + } else { + alloc_init_pte(pmd, addr, next, + __phys_to_pfn(phys), type); + } + + phys += next - addr; + + } while (pmd++, addr = next, addr != end); +} + +static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + alloc_init_pmd(pud, addr, next, phys, type); + phys += next - addr; + } while (pud++, addr = next, addr != end); } +#ifndef CONFIG_ARM_LPAE static void __init create_36bit_mapping(struct map_desc *md, const struct mem_type *type) { - unsigned long phys, addr, length, end; + unsigned long addr, length, end; + phys_addr_t phys; pgd_t *pgd; addr = md->virtual; - phys = (unsigned long)__pfn_to_phys(md->pfn); + phys = __pfn_to_phys(md->pfn); length = PAGE_ALIGN(md->length); if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) { printk(KERN_ERR "MM: CPU does not support supersection " "mapping for 0x%08llx at 0x%08lx\n", - __pfn_to_phys((u64)md->pfn), addr); + (long long)__pfn_to_phys((u64)md->pfn), addr); return; } @@ -611,14 +801,14 @@ static void __init create_36bit_mapping(struct map_desc *md, if (type->domain) { printk(KERN_ERR "MM: invalid domain in supersection " "mapping for 0x%08llx at 0x%08lx\n", - __pfn_to_phys((u64)md->pfn), addr); + (long long)__pfn_to_phys((u64)md->pfn), addr); return; } if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) { - printk(KERN_ERR "MM: cannot create mapping for " - "0x%08llx at 0x%08lx invalid alignment\n", - __pfn_to_phys((u64)md->pfn), addr); + printk(KERN_ERR "MM: cannot create mapping for 0x%08llx" + " at 0x%08lx invalid alignment\n", + (long long)__pfn_to_phys((u64)md->pfn), addr); return; } @@ -631,7 +821,8 @@ static void __init create_36bit_mapping(struct map_desc *md, pgd = pgd_offset_k(addr); end = addr + length; do { - pmd_t *pmd = pmd_offset(pgd, addr); + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); int i; for (i = 0; i < 16; i++) @@ -642,6 +833,7 @@ static void __init create_36bit_mapping(struct map_desc *md, pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT; } while (addr != end); } +#endif /* !CONFIG_ARM_LPAE */ /* * Create the page directory entries and any necessary @@ -652,26 +844,29 @@ static void __init create_36bit_mapping(struct map_desc *md, */ static void __init create_mapping(struct map_desc *md) { - unsigned long phys, addr, length, end; + unsigned long addr, length, end; + phys_addr_t phys; const struct mem_type *type; pgd_t *pgd; if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { - printk(KERN_WARNING "BUG: not creating mapping for " - "0x%08llx at 0x%08lx in user region\n", - __pfn_to_phys((u64)md->pfn), md->virtual); + printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx" + " at 0x%08lx in user region\n", + (long long)__pfn_to_phys((u64)md->pfn), md->virtual); return; } if ((md->type == MT_DEVICE || md->type == MT_ROM) && - md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) { - printk(KERN_WARNING "BUG: mapping for 0x%08llx at 0x%08lx " - "overlaps vmalloc space\n", - __pfn_to_phys((u64)md->pfn), md->virtual); + md->virtual >= PAGE_OFFSET && + (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { + printk(KERN_WARNING "BUG: mapping for 0x%08llx" + " at 0x%08lx out of vmalloc space\n", + (long long)__pfn_to_phys((u64)md->pfn), md->virtual); } type = &mem_types[md->type]; +#ifndef CONFIG_ARM_LPAE /* * Catch 36-bit addresses */ @@ -679,15 +874,16 @@ static void __init create_mapping(struct map_desc *md) create_36bit_mapping(md, type); return; } +#endif addr = md->virtual & PAGE_MASK; - phys = (unsigned long)__pfn_to_phys(md->pfn); + phys = __pfn_to_phys(md->pfn); length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { - printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not " + printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not " "be mapped using pages, ignoring.\n", - __pfn_to_phys(md->pfn), addr); + (long long)__pfn_to_phys(md->pfn), addr); return; } @@ -696,7 +892,7 @@ static void __init create_mapping(struct map_desc *md) do { unsigned long next = pgd_addr_end(addr, end); - alloc_init_section(pgd, addr, next, phys, type); + alloc_init_pud(pgd, addr, next, phys, type); phys += next - addr; addr = next; @@ -708,18 +904,148 @@ static void __init create_mapping(struct map_desc *md) */ void __init iotable_init(struct map_desc *io_desc, int nr) { - int i; + struct map_desc *md; + struct vm_struct *vm; + struct static_vm *svm; + + if (!nr) + return; - for (i = 0; i < nr; i++) - create_mapping(io_desc + i); + svm = early_alloc_aligned(sizeof(*svm) * nr, __alignof__(*svm)); + + for (md = io_desc; nr; md++, nr--) { + create_mapping(md); + + vm = &svm->vm; + vm->addr = (void *)(md->virtual & PAGE_MASK); + vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); + vm->phys_addr = __pfn_to_phys(md->pfn); + vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; + vm->flags |= VM_ARM_MTYPE(md->type); + vm->caller = iotable_init; + add_static_vm_early(svm++); + } +} + +void __init vm_reserve_area_early(unsigned long addr, unsigned long size, + void *caller) +{ + struct vm_struct *vm; + struct static_vm *svm; + + svm = early_alloc_aligned(sizeof(*svm), __alignof__(*svm)); + + vm = &svm->vm; + vm->addr = (void *)addr; + vm->size = size; + vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING; + vm->caller = caller; + add_static_vm_early(svm); } -static void * __initdata vmalloc_min = (void *)(VMALLOC_END - SZ_128M); +#ifndef CONFIG_ARM_LPAE + +/* + * The Linux PMD is made of two consecutive section entries covering 2MB + * (see definition in include/asm/pgtable-2level.h). However a call to + * create_mapping() may optimize static mappings by using individual + * 1MB section mappings. This leaves the actual PMD potentially half + * initialized if the top or bottom section entry isn't used, leaving it + * open to problems if a subsequent ioremap() or vmalloc() tries to use + * the virtual space left free by that unused section entry. + * + * Let's avoid the issue by inserting dummy vm entries covering the unused + * PMD halves once the static mappings are in place. + */ + +static void __init pmd_empty_section_gap(unsigned long addr) +{ + vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap); +} + +static void __init fill_pmd_gaps(void) +{ + struct static_vm *svm; + struct vm_struct *vm; + unsigned long addr, next = 0; + pmd_t *pmd; + + list_for_each_entry(svm, &static_vmlist, list) { + vm = &svm->vm; + addr = (unsigned long)vm->addr; + if (addr < next) + continue; + + /* + * Check if this vm starts on an odd section boundary. + * If so and the first section entry for this PMD is free + * then we block the corresponding virtual address. + */ + if ((addr & ~PMD_MASK) == SECTION_SIZE) { + pmd = pmd_off_k(addr); + if (pmd_none(*pmd)) + pmd_empty_section_gap(addr & PMD_MASK); + } + + /* + * Then check if this vm ends on an odd section boundary. + * If so and the second section entry for this PMD is empty + * then we block the corresponding virtual address. + */ + addr += vm->size; + if ((addr & ~PMD_MASK) == SECTION_SIZE) { + pmd = pmd_off_k(addr) + 1; + if (pmd_none(*pmd)) + pmd_empty_section_gap(addr); + } + + /* no need to look at any vm entry until we hit the next PMD */ + next = (addr + PMD_SIZE - 1) & PMD_MASK; + } +} + +#else +#define fill_pmd_gaps() do { } while (0) +#endif + +#if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H) +static void __init pci_reserve_io(void) +{ + struct static_vm *svm; + + svm = find_static_vm_vaddr((void *)PCI_IO_VIRT_BASE); + if (svm) + return; + + vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io); +} +#else +#define pci_reserve_io() do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LL +void __init debug_ll_io_init(void) +{ + struct map_desc map; + + debug_ll_addr(&map.pfn, &map.virtual); + if (!map.pfn || !map.virtual) + return; + map.pfn = __phys_to_pfn(map.pfn); + map.virtual &= PAGE_MASK; + map.length = PAGE_SIZE; + map.type = MT_DEVICE; + iotable_init(&map, 1); +} +#endif + +static void * __initdata vmalloc_min = + (void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET); /* * vmalloc=size forces the vmalloc area to be exactly 'size' * bytes. This can be used to increase (or decrease) the vmalloc - * area - the default is 128m. + * area - the default is 240m. */ static int __init early_vmalloc(char *arg) { @@ -744,109 +1070,89 @@ static int __init early_vmalloc(char *arg) } early_param("vmalloc", early_vmalloc); -static phys_addr_t lowmem_limit __initdata = 0; +phys_addr_t arm_lowmem_limit __initdata = 0; -static void __init sanity_check_meminfo(void) +void __init sanity_check_meminfo(void) { - int i, j, highmem = 0; - - lowmem_limit = __pa(vmalloc_min - 1) + 1; - memblock_set_current_limit(lowmem_limit); + phys_addr_t memblock_limit = 0; + int highmem = 0; + phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; + struct memblock_region *reg; - for (i = 0, j = 0; i < meminfo.nr_banks; i++) { - struct membank *bank = &meminfo.bank[j]; - *bank = meminfo.bank[i]; + for_each_memblock(memory, reg) { + phys_addr_t block_start = reg->base; + phys_addr_t block_end = reg->base + reg->size; + phys_addr_t size_limit = reg->size; -#ifdef CONFIG_HIGHMEM - if (__va(bank->start) > vmalloc_min || - __va(bank->start) < (void *)PAGE_OFFSET) + if (reg->base >= vmalloc_limit) highmem = 1; + else + size_limit = vmalloc_limit - reg->base; - bank->highmem = highmem; - /* - * Split those memory banks which are partially overlapping - * the vmalloc area greatly simplifying things later. - */ - if (__va(bank->start) < vmalloc_min && - bank->size > vmalloc_min - __va(bank->start)) { - if (meminfo.nr_banks >= NR_BANKS) { - printk(KERN_CRIT "NR_BANKS too low, " - "ignoring high memory\n"); - } else { - memmove(bank + 1, bank, - (meminfo.nr_banks - i) * sizeof(*bank)); - meminfo.nr_banks++; - i++; - bank[1].size -= vmalloc_min - __va(bank->start); - bank[1].start = __pa(vmalloc_min - 1) + 1; - bank[1].highmem = highmem = 1; - j++; + if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) { + + if (highmem) { + pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n", + &block_start, &block_end); + memblock_remove(reg->base, reg->size); + continue; } - bank->size = vmalloc_min - __va(bank->start); - } -#else - bank->highmem = highmem; - /* - * Check whether this memory bank would entirely overlap - * the vmalloc area. - */ - if (__va(bank->start) >= vmalloc_min || - __va(bank->start) < (void *)PAGE_OFFSET) { - printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx " - "(vmalloc region overlap).\n", - bank->start, bank->start + bank->size - 1); - continue; - } + if (reg->size > size_limit) { + phys_addr_t overlap_size = reg->size - size_limit; - /* - * Check whether this memory bank would partially overlap - * the vmalloc area. - */ - if (__va(bank->start + bank->size) > vmalloc_min || - __va(bank->start + bank->size) < __va(bank->start)) { - unsigned long newsize = vmalloc_min - __va(bank->start); - printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx " - "to -%.8lx (vmalloc region overlap).\n", - bank->start, bank->start + bank->size - 1, - bank->start + newsize - 1); - bank->size = newsize; + pr_notice("Truncating RAM at %pa-%pa to -%pa", + &block_start, &block_end, &vmalloc_limit); + memblock_remove(vmalloc_limit, overlap_size); + block_end = vmalloc_limit; + } } -#endif - j++; - } -#ifdef CONFIG_HIGHMEM - if (highmem) { - const char *reason = NULL; - if (cache_is_vipt_aliasing()) { - /* - * Interactions between kmap and other mappings - * make highmem support with aliasing VIPT caches - * rather difficult. - */ - reason = "with VIPT aliasing cache"; - } else if (is_smp() && tlb_ops_need_broadcast()) { + if (!highmem) { + if (block_end > arm_lowmem_limit) { + if (reg->size > size_limit) + arm_lowmem_limit = vmalloc_limit; + else + arm_lowmem_limit = block_end; + } + /* - * kmap_high needs to occasionally flush TLB entries, - * however, if the TLB entries need to be broadcast - * we may deadlock: - * kmap_high(irqs off)->flush_all_zero_pkmaps-> - * flush_tlb_kernel_range->smp_call_function_many - * (must not be called with irqs off) + * Find the first non-section-aligned page, and point + * memblock_limit at it. This relies on rounding the + * limit down to be section-aligned, which happens at + * the end of this function. + * + * With this algorithm, the start or end of almost any + * bank can be non-section-aligned. The only exception + * is that the start of the bank 0 must be section- + * aligned, since otherwise memory would need to be + * allocated when mapping the start of bank 0, which + * occurs before any free memory is mapped. */ - reason = "without hardware TLB ops broadcasting"; - } - if (reason) { - printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n", - reason); - while (j > 0 && meminfo.bank[j - 1].highmem) - j--; + if (!memblock_limit) { + if (!IS_ALIGNED(block_start, SECTION_SIZE)) + memblock_limit = block_start; + else if (!IS_ALIGNED(block_end, SECTION_SIZE)) + memblock_limit = arm_lowmem_limit; + } + } } -#endif - meminfo.nr_banks = j; + + high_memory = __va(arm_lowmem_limit - 1) + 1; + + /* + * Round the memblock limit down to a section size. This + * helps to ensure that we will allocate memory from the + * last full section, which should be mapped. + */ + if (memblock_limit) + memblock_limit = round_down(memblock_limit, SECTION_SIZE); + if (!memblock_limit) + memblock_limit = arm_lowmem_limit; + + memblock_set_current_limit(memblock_limit); } static inline void prepare_page_table(void) @@ -857,32 +1163,40 @@ static inline void prepare_page_table(void) /* * Clear out all the mappings below the kernel image. */ - for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE) + for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); #ifdef CONFIG_XIP_KERNEL /* The XIP kernel is mapped in the module area -- skip over it */ - addr = ((unsigned long)_etext + PGDIR_SIZE - 1) & PGDIR_MASK; + addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK; #endif - for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE) + for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); /* * Find the end of the first block of lowmem. */ end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; - if (end >= lowmem_limit) - end = lowmem_limit; + if (end >= arm_lowmem_limit) + end = arm_lowmem_limit; /* * Clear out all the kernel space mappings, except for the first - * memory bank, up to the end of the vmalloc region. + * memory bank, up to the vmalloc region. */ for (addr = __phys_to_virt(end); - addr < VMALLOC_END; addr += PGDIR_SIZE) + addr < VMALLOC_START; addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); } +#ifdef CONFIG_ARM_LPAE +/* the first page is reserved for pgd */ +#define SWAPPER_PG_DIR_SIZE (PAGE_SIZE + \ + PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t)) +#else +#define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) +#endif + /* * Reserve the special regions of memory */ @@ -892,7 +1206,7 @@ void __init arm_mm_memblock_reserve(void) * Reserve the page tables. These are already in use, * and can only be in node 0. */ - memblock_reserve(__pa(swapper_pg_dir), PTRS_PER_PGD * sizeof(pgd_t)); + memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE); #ifdef CONFIG_SA1111 /* @@ -904,13 +1218,13 @@ void __init arm_mm_memblock_reserve(void) } /* - * Set up device the mappings. Since we clear out the page tables for all - * mappings above VMALLOC_END, we will remove any debug device mappings. + * Set up the device mappings. Since we clear out the page tables for all + * mappings above VMALLOC_START, we will remove any debug device mappings. * This means you have to be careful how you debug this function, or any * called function. This means you can't use any function or debugging * method which may touch any device, otherwise the kernel _will_ crash. */ -static void __init devicemaps_init(struct machine_desc *mdesc) +static void __init devicemaps_init(const struct machine_desc *mdesc) { struct map_desc map; unsigned long addr; @@ -919,9 +1233,11 @@ static void __init devicemaps_init(struct machine_desc *mdesc) /* * Allocate the vector page early. */ - vectors = early_alloc(PAGE_SIZE); + vectors = early_alloc(PAGE_SIZE * 2); - for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE) + early_trap_init(vectors); + + for (addr = VMALLOC_START; addr; addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); /* @@ -962,20 +1278,38 @@ static void __init devicemaps_init(struct machine_desc *mdesc) map.pfn = __phys_to_pfn(virt_to_phys(vectors)); map.virtual = 0xffff0000; map.length = PAGE_SIZE; +#ifdef CONFIG_KUSER_HELPERS map.type = MT_HIGH_VECTORS; +#else + map.type = MT_LOW_VECTORS; +#endif create_mapping(&map); if (!vectors_high()) { map.virtual = 0; + map.length = PAGE_SIZE * 2; map.type = MT_LOW_VECTORS; create_mapping(&map); } + /* Now create a kernel read-only mapping */ + map.pfn += 1; + map.virtual = 0xffff0000 + PAGE_SIZE; + map.length = PAGE_SIZE; + map.type = MT_LOW_VECTORS; + create_mapping(&map); + /* * Ask the machine support to map in the statically mapped devices. */ if (mdesc->map_io) mdesc->map_io(); + else + debug_ll_io_init(); + fill_pmd_gaps(); + + /* Reserve fixed i/o space in VMALLOC region */ + pci_reserve_io(); /* * Finally flush the caches and tlb to ensure that we're in a @@ -992,12 +1326,17 @@ static void __init kmap_init(void) #ifdef CONFIG_HIGHMEM pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE), PKMAP_BASE, _PAGE_KERNEL_TABLE); + + fixmap_page_table = early_pte_alloc(pmd_off_k(FIXADDR_START), + FIXADDR_START, _PAGE_KERNEL_TABLE); #endif } static void __init map_lowmem(void) { struct memblock_region *reg; + unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); /* Map all the lowmem memory banks. */ for_each_memblock(memory, reg) { @@ -1005,34 +1344,143 @@ static void __init map_lowmem(void) phys_addr_t end = start + reg->size; struct map_desc map; - if (end > lowmem_limit) - end = lowmem_limit; + if (end > arm_lowmem_limit) + end = arm_lowmem_limit; if (start >= end) break; - map.pfn = __phys_to_pfn(start); - map.virtual = __phys_to_virt(start); - map.length = end - start; - map.type = MT_MEMORY; + if (end < kernel_x_start || start >= kernel_x_end) { + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY_RWX; - create_mapping(&map); + create_mapping(&map); + } else { + /* This better cover the entire kernel */ + if (start < kernel_x_start) { + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = kernel_x_start - start; + map.type = MT_MEMORY_RW; + + create_mapping(&map); + } + + map.pfn = __phys_to_pfn(kernel_x_start); + map.virtual = __phys_to_virt(kernel_x_start); + map.length = kernel_x_end - kernel_x_start; + map.type = MT_MEMORY_RWX; + + create_mapping(&map); + + if (kernel_x_end < end) { + map.pfn = __phys_to_pfn(kernel_x_end); + map.virtual = __phys_to_virt(kernel_x_end); + map.length = end - kernel_x_end; + map.type = MT_MEMORY_RW; + + create_mapping(&map); + } + } } } +#ifdef CONFIG_ARM_LPAE +/* + * early_paging_init() recreates boot time page table setup, allowing machines + * to switch over to a high (>4G) address space on LPAE systems + */ +void __init early_paging_init(const struct machine_desc *mdesc, + struct proc_info_list *procinfo) +{ + pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags; + unsigned long map_start, map_end; + pgd_t *pgd0, *pgdk; + pud_t *pud0, *pudk, *pud_start; + pmd_t *pmd0, *pmdk; + phys_addr_t phys; + int i; + + if (!(mdesc->init_meminfo)) + return; + + /* remap kernel code and data */ + map_start = init_mm.start_code & PMD_MASK; + map_end = ALIGN(init_mm.brk, PMD_SIZE); + + /* get a handle on things... */ + pgd0 = pgd_offset_k(0); + pud_start = pud0 = pud_offset(pgd0, 0); + pmd0 = pmd_offset(pud0, 0); + + pgdk = pgd_offset_k(map_start); + pudk = pud_offset(pgdk, map_start); + pmdk = pmd_offset(pudk, map_start); + + mdesc->init_meminfo(); + + /* Run the patch stub to update the constants */ + fixup_pv_table(&__pv_table_begin, + (&__pv_table_end - &__pv_table_begin) << 2); + + /* + * Cache cleaning operations for self-modifying code + * We should clean the entries by MVA but running a + * for loop over every pv_table entry pointer would + * just complicate the code. + */ + flush_cache_louis(); + dsb(ishst); + isb(); + + /* remap level 1 table */ + for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { + set_pud(pud0, + __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); + pmd0 += PTRS_PER_PMD; + } + + /* remap pmds for kernel mapping */ + phys = __pa(map_start); + do { + *pmdk++ = __pmd(phys | pmdprot); + phys += PMD_SIZE; + } while (phys < map_end); + + flush_cache_all(); + cpu_switch_mm(pgd0, &init_mm); + cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); + local_flush_bp_all(); + local_flush_tlb_all(); +} + +#else + +void __init early_paging_init(const struct machine_desc *mdesc, + struct proc_info_list *procinfo) +{ + if (mdesc->init_meminfo) + mdesc->init_meminfo(); +} + +#endif + /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. */ -void __init paging_init(struct machine_desc *mdesc) +void __init paging_init(const struct machine_desc *mdesc) { void *zero_page; build_mem_type_table(); - sanity_check_meminfo(); prepare_page_table(); map_lowmem(); + dma_contiguous_remap(); devicemaps_init(mdesc); kmap_init(); + tcm_init(); top_pmd = pmd_off_k(0xffff0000); @@ -1044,38 +1492,3 @@ void __init paging_init(struct machine_desc *mdesc) empty_zero_page = virt_to_page(zero_page); __flush_dcache_page(NULL, empty_zero_page); } - -/* - * In order to soft-boot, we need to insert a 1:1 mapping in place of - * the user-mode pages. This will then ensure that we have predictable - * results when turning the mmu off - */ -void setup_mm_for_reboot(char mode) -{ - unsigned long base_pmdval; - pgd_t *pgd; - int i; - - /* - * We need to access to user-mode page tables here. For kernel threads - * we don't have any user-mode mappings so we use the context that we - * "borrowed". - */ - pgd = current->active_mm->pgd; - - base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT; - if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) - base_pmdval |= PMD_BIT4; - - for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++, pgd++) { - unsigned long pmdval = (i << PGDIR_SHIFT) | base_pmdval; - pmd_t *pmd; - - pmd = pmd_off(pgd, i << PGDIR_SHIFT); - pmd[0] = __pmd(pmdval); - pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1))); - flush_pmd_entry(pmd); - } - - local_flush_tlb_all(); -} diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 687d02319a4..a014dfacd5c 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -8,38 +8,325 @@ #include <linux/pagemap.h> #include <linux/io.h> #include <linux/memblock.h> +#include <linux/kernel.h> #include <asm/cacheflush.h> #include <asm/sections.h> #include <asm/page.h> #include <asm/setup.h> +#include <asm/traps.h> #include <asm/mach/arch.h> +#include <asm/cputype.h> +#include <asm/mpu.h> +#include <asm/procinfo.h> #include "mm.h" +#ifdef CONFIG_ARM_MPU +struct mpu_rgn_info mpu_rgn_info; + +/* Region number */ +static void rgnr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c2, 0" : : "r" (v)); +} + +/* Data-side / unified region attributes */ + +/* Region access control register */ +static void dracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 4" : : "r" (v)); +} + +/* Region size register */ +static void drsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 2" : : "r" (v)); +} + +/* Region base address register */ +static void drbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 0" : : "r" (v)); +} + +static u32 drbar_read(void) +{ + u32 v; + asm("mrc p15, 0, %0, c6, c1, 0" : "=r" (v)); + return v; +} +/* Optional instruction-side region attributes */ + +/* I-side Region access control register */ +static void iracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 5" : : "r" (v)); +} + +/* I-side Region size register */ +static void irsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 3" : : "r" (v)); +} + +/* I-side Region base address register */ +static void irbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 1" : : "r" (v)); +} + +static unsigned long irbar_read(void) +{ + unsigned long v; + asm("mrc p15, 0, %0, c6, c1, 1" : "=r" (v)); + return v; +} + +/* MPU initialisation functions */ +void __init sanity_check_meminfo_mpu(void) +{ + int i; + phys_addr_t phys_offset = PHYS_OFFSET; + phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size; + struct memblock_region *reg; + bool first = true; + phys_addr_t mem_start; + phys_addr_t mem_end; + + for_each_memblock(memory, reg) { + if (first) { + /* + * Initially only use memory continuous from + * PHYS_OFFSET */ + if (reg->base != phys_offset) + panic("First memory bank must be contiguous from PHYS_OFFSET"); + + mem_start = reg->base; + mem_end = reg->base + reg->size; + specified_mem_size = reg->size; + first = false; + } else { + /* + * memblock auto merges contiguous blocks, remove + * all blocks afterwards + */ + pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", + &mem_start, ®->base); + memblock_remove(reg->base, reg->size); + } + } + + /* + * MPU has curious alignment requirements: Size must be power of 2, and + * region start must be aligned to the region size + */ + if (phys_offset != 0) + pr_info("PHYS_OFFSET != 0 => MPU Region size constrained by alignment requirements\n"); + + /* + * Maximum aligned region might overflow phys_addr_t if phys_offset is + * 0. Hence we keep everything below 4G until we take the smaller of + * the aligned_region_size and rounded_mem_size, one of which is + * guaranteed to be smaller than the maximum physical address. + */ + aligned_region_size = (phys_offset - 1) ^ (phys_offset); + /* Find the max power-of-two sized region that fits inside our bank */ + rounded_mem_size = (1 << __fls(specified_mem_size)) - 1; + + /* The actual region size is the smaller of the two */ + aligned_region_size = aligned_region_size < rounded_mem_size + ? aligned_region_size + 1 + : rounded_mem_size + 1; + + if (aligned_region_size != specified_mem_size) { + pr_warn("Truncating memory from %pa to %pa (MPU region constraints)", + &specified_mem_size, &aligned_region_size); + memblock_remove(mem_start + aligned_region_size, + specified_mem_size - aligned_round_size); + + mem_end = mem_start + aligned_region_size; + } + + pr_debug("MPU Region from %pa size %pa (end %pa))\n", + &phys_offset, &aligned_region_size, &mem_end); + +} + +static int mpu_present(void) +{ + return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7); +} + +static int mpu_max_regions(void) +{ + /* + * We don't support a different number of I/D side regions so if we + * have separate instruction and data memory maps then return + * whichever side has a smaller number of supported regions. + */ + u32 dregions, iregions, mpuir; + mpuir = read_cpuid(CPUID_MPUIR); + + dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION; + + /* Check for separate d-side and i-side memory maps */ + if (mpuir & MPUIR_nU) + iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION; + + /* Use the smallest of the two maxima */ + return min(dregions, iregions); +} + +static int mpu_iside_independent(void) +{ + /* MPUIR.nU specifies whether there is *not* a unified memory map */ + return read_cpuid(CPUID_MPUIR) & MPUIR_nU; +} + +static int mpu_min_region_order(void) +{ + u32 drbar_result, irbar_result; + /* We've kept a region free for this probing */ + rgnr_write(MPU_PROBE_REGION); + isb(); + /* + * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum + * region order + */ + drbar_write(0xFFFFFFFC); + drbar_result = irbar_result = drbar_read(); + drbar_write(0x0); + /* If the MPU is non-unified, we use the larger of the two minima*/ + if (mpu_iside_independent()) { + irbar_write(0xFFFFFFFC); + irbar_result = irbar_read(); + irbar_write(0x0); + } + isb(); /* Ensure that MPU region operations have completed */ + /* Return whichever result is larger */ + return __ffs(max(drbar_result, irbar_result)); +} + +static int mpu_setup_region(unsigned int number, phys_addr_t start, + unsigned int size_order, unsigned int properties) +{ + u32 size_data; + + /* We kept a region free for probing resolution of MPU regions*/ + if (number > mpu_max_regions() || number == MPU_PROBE_REGION) + return -ENOENT; + + if (size_order > 32) + return -ENOMEM; + + if (size_order < mpu_min_region_order()) + return -ENOMEM; + + /* Writing N to bits 5:1 (RSR_SZ) specifies region size 2^N+1 */ + size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN; + + dsb(); /* Ensure all previous data accesses occur with old mappings */ + rgnr_write(number); + isb(); + drbar_write(start); + dracr_write(properties); + isb(); /* Propagate properties before enabling region */ + drsr_write(size_data); + + /* Check for independent I-side registers */ + if (mpu_iside_independent()) { + irbar_write(start); + iracr_write(properties); + isb(); + irsr_write(size_data); + } + isb(); + + /* Store region info (we treat i/d side the same, so only store d) */ + mpu_rgn_info.rgns[number].dracr = properties; + mpu_rgn_info.rgns[number].drbar = start; + mpu_rgn_info.rgns[number].drsr = size_data; + return 0; +} + +/* +* Set up default MPU regions, doing nothing if there is no MPU +*/ +void __init mpu_setup(void) +{ + int region_err; + if (!mpu_present()) + return; + + region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET, + ilog2(meminfo.bank[0].size), + MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); + if (region_err) { + panic("MPU region initialization failure! %d", region_err); + } else { + pr_info("Using ARMv7 PMSA Compliant MPU. " + "Region independence: %s, Max regions: %d\n", + mpu_iside_independent() ? "Yes" : "No", + mpu_max_regions()); + } +} +#else +static void sanity_check_meminfo_mpu(void) {} +static void __init mpu_setup(void) {} +#endif /* CONFIG_ARM_MPU */ + void __init arm_mm_memblock_reserve(void) { +#ifndef CONFIG_CPU_V7M /* * Register the exception vector page. * some architectures which the DRAM is the exception vector to trap, * alloc_page breaks with error, although it is not NULL, but "0." */ memblock_reserve(CONFIG_VECTORS_BASE, PAGE_SIZE); +#else /* ifndef CONFIG_CPU_V7M */ + /* + * There is no dedicated vector page on V7-M. So nothing needs to be + * reserved here. + */ +#endif +} + +void __init sanity_check_meminfo(void) +{ + phys_addr_t end; + sanity_check_meminfo_mpu(); + end = memblock_end_of_DRAM(); + high_memory = __va(end - 1) + 1; + memblock_set_current_limit(end); +} + +/* + * early_paging_init() recreates boot time page table setup, allowing machines + * to switch over to a high (>4G) address space on LPAE systems + */ +void __init early_paging_init(const struct machine_desc *mdesc, + struct proc_info_list *procinfo) +{ } /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. */ -void __init paging_init(struct machine_desc *mdesc) +void __init paging_init(const struct machine_desc *mdesc) { + early_trap_init((void *)CONFIG_VECTORS_BASE); + mpu_setup(); bootmem_init(); } /* * We don't need to do anything here for nommu machines. */ -void setup_mm_for_reboot(char mode) +void setup_mm_for_reboot(void) { } @@ -49,6 +336,12 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); +void flush_kernel_dcache_page(struct page *page) +{ + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); +} +EXPORT_SYMBOL(flush_kernel_dcache_page); + void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len) @@ -73,20 +366,24 @@ void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, return __arm_ioremap_pfn(pfn, offset, size, mtype); } -void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) { return (void __iomem *)phys_addr; } EXPORT_SYMBOL(__arm_ioremap); -void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); + +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { return __arm_ioremap(phys_addr, size, mtype); } -void __iounmap(volatile void __iomem *addr) +void (*arch_iounmap)(volatile void __iomem *); + +void __arm_iounmap(volatile void __iomem *addr) { } -EXPORT_SYMBOL(__iounmap); +EXPORT_SYMBOL(__arm_iounmap); diff --git a/arch/arm/mm/pabort-legacy.S b/arch/arm/mm/pabort-legacy.S index 87970eba88e..8bbff025269 100644 --- a/arch/arm/mm/pabort-legacy.S +++ b/arch/arm/mm/pabort-legacy.S @@ -4,16 +4,18 @@ /* * Function: legacy_pabort * - * Params : r0 = address of aborted instruction + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context * - * Returns : r0 = address of abort - * : r1 = Simulated IFSR with section translation fault status + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current prefetch abort. */ .align 5 ENTRY(legacy_pabort) + mov r0, r4 mov r1, #5 - mov pc, lr + b do_PrefetchAbort ENDPROC(legacy_pabort) diff --git a/arch/arm/mm/pabort-v6.S b/arch/arm/mm/pabort-v6.S index 06e3d1ef211..9627646ce78 100644 --- a/arch/arm/mm/pabort-v6.S +++ b/arch/arm/mm/pabort-v6.S @@ -4,16 +4,18 @@ /* * Function: v6_pabort * - * Params : r0 = address of aborted instruction + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context * - * Returns : r0 = address of abort - * : r1 = IFSR + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current prefetch abort. */ .align 5 ENTRY(v6_pabort) + mov r0, r4 mrc p15, 0, r1, c5, c0, 1 @ get IFSR - mov pc, lr + b do_PrefetchAbort ENDPROC(v6_pabort) diff --git a/arch/arm/mm/pabort-v7.S b/arch/arm/mm/pabort-v7.S index a8b3b300a18..875761f44f3 100644 --- a/arch/arm/mm/pabort-v7.S +++ b/arch/arm/mm/pabort-v7.S @@ -2,12 +2,13 @@ #include <asm/assembler.h> /* - * Function: v6_pabort + * Function: v7_pabort * - * Params : r0 = address of aborted instruction + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context * - * Returns : r0 = address of abort - * : r1 = IFSR + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current prefetch abort. */ @@ -16,5 +17,5 @@ ENTRY(v7_pabort) mrc p15, 0, r0, c6, c0, 2 @ get IFAR mrc p15, 0, r1, c5, c0, 1 @ get IFSR - mov pc, lr + b do_PrefetchAbort ENDPROC(v7_pabort) diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index 69bbfc6645a..249379535be 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -10,55 +10,85 @@ #include <linux/mm.h> #include <linux/gfp.h> #include <linux/highmem.h> +#include <linux/slab.h> +#include <asm/cp15.h> #include <asm/pgalloc.h> #include <asm/page.h> #include <asm/tlbflush.h> #include "mm.h" -#define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD) +#ifdef CONFIG_ARM_LPAE +#define __pgd_alloc() kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL) +#define __pgd_free(pgd) kfree(pgd) +#else +#define __pgd_alloc() (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, 2) +#define __pgd_free(pgd) free_pages((unsigned long)pgd, 2) +#endif /* * need to get a 16k page for level 1 */ -pgd_t *get_pgd_slow(struct mm_struct *mm) +pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *new_pgd, *init_pgd; + pud_t *new_pud, *init_pud; pmd_t *new_pmd, *init_pmd; pte_t *new_pte, *init_pte; - new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2); + new_pgd = __pgd_alloc(); if (!new_pgd) goto no_pgd; - memset(new_pgd, 0, FIRST_KERNEL_PGD_NR * sizeof(pgd_t)); + memset(new_pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); /* * Copy over the kernel and IO PGD entries */ init_pgd = pgd_offset_k(0); - memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR, - (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t)); + memcpy(new_pgd + USER_PTRS_PER_PGD, init_pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); +#ifdef CONFIG_ARM_LPAE + /* + * Allocate PMD table for modules and pkmap mappings. + */ + new_pud = pud_alloc(mm, new_pgd + pgd_index(MODULES_VADDR), + MODULES_VADDR); + if (!new_pud) + goto no_pud; + + new_pmd = pmd_alloc(mm, new_pud, 0); + if (!new_pmd) + goto no_pmd; +#endif + if (!vectors_high()) { /* * On ARM, first page must always be allocated since it - * contains the machine vectors. + * contains the machine vectors. The vectors are always high + * with LPAE. */ - new_pmd = pmd_alloc(mm, new_pgd, 0); + new_pud = pud_alloc(mm, new_pgd, 0); + if (!new_pud) + goto no_pud; + + new_pmd = pmd_alloc(mm, new_pud, 0); if (!new_pmd) goto no_pmd; - new_pte = pte_alloc_map(mm, new_pmd, 0); + new_pte = pte_alloc_map(mm, NULL, new_pmd, 0); if (!new_pte) goto no_pte; - init_pmd = pmd_offset(init_pgd, 0); + init_pud = pud_offset(init_pgd, 0); + init_pmd = pmd_offset(init_pud, 0); init_pte = pte_offset_map(init_pmd, 0); - set_pte_ext(new_pte, *init_pte, 0); + set_pte_ext(new_pte + 0, init_pte[0], 0); + set_pte_ext(new_pte + 1, init_pte[1], 0); pte_unmap(init_pte); pte_unmap(new_pte); } @@ -68,33 +98,63 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) no_pte: pmd_free(mm, new_pmd); no_pmd: - free_pages((unsigned long)new_pgd, 2); + pud_free(mm, new_pud); +no_pud: + __pgd_free(new_pgd); no_pgd: return NULL; } -void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd) +void pgd_free(struct mm_struct *mm, pgd_t *pgd_base) { + pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pgtable_t pte; - if (!pgd) + if (!pgd_base) return; - /* pgd is always present and good */ - pmd = pmd_off(pgd, 0); - if (pmd_none(*pmd)) - goto free; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - goto free; - } + pgd = pgd_base + pgd_index(0); + if (pgd_none_or_clear_bad(pgd)) + goto no_pgd; + + pud = pud_offset(pgd, 0); + if (pud_none_or_clear_bad(pud)) + goto no_pud; + + pmd = pmd_offset(pud, 0); + if (pmd_none_or_clear_bad(pmd)) + goto no_pmd; pte = pmd_pgtable(*pmd); pmd_clear(pmd); pte_free(mm, pte); +no_pmd: + pud_clear(pud); pmd_free(mm, pmd); -free: - free_pages((unsigned long) pgd, 2); +no_pud: + pgd_clear(pgd); + pud_free(mm, pud); +no_pgd: +#ifdef CONFIG_ARM_LPAE + /* + * Free modules/pkmap or identity pmd tables. + */ + for (pgd = pgd_base; pgd < pgd_base + PTRS_PER_PGD; pgd++) { + if (pgd_none_or_clear_bad(pgd)) + continue; + if (pgd_val(*pgd) & L_PGD_SWAPPER) + continue; + pud = pud_offset(pgd, 0); + if (pud_none_or_clear_bad(pud)) + continue; + pmd = pmd_offset(pud, 0); + pud_clear(pud); + pmd_free(mm, pmd); + pgd_clear(pgd); + pud_free(mm, pud); + } +#endif + __pgd_free(pgd_base); } diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index bcf748d9f4e..d1a2d05971e 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -64,7 +64,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 32768 @@ -95,6 +95,7 @@ ENTRY(cpu_arm1020_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm1020_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -107,6 +108,8 @@ ENTRY(cpu_arm1020_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm1020_reset) + .popsection /* * cpu_arm1020_do_idle() @@ -238,6 +241,7 @@ ENTRY(arm1020_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* @@ -364,17 +368,11 @@ ENTRY(arm1020_dma_unmap_area) mov pc, lr ENDPROC(arm1020_dma_unmap_area) -ENTRY(arm1020_cache_fns) - .long arm1020_flush_icache_all - .long arm1020_flush_kern_cache_all - .long arm1020_flush_user_cache_all - .long arm1020_flush_user_cache_range - .long arm1020_coherent_kern_range - .long arm1020_coherent_user_range - .long arm1020_flush_kern_dcache_area - .long arm1020_dma_map_area - .long arm1020_dma_unmap_area - .long arm1020_dma_flush_range + .globl arm1020_flush_kern_cache_louis + .equ arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1020 .align 5 ENTRY(cpu_arm1020_dcache_clean_area) @@ -445,8 +443,6 @@ ENTRY(cpu_arm1020_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - __CPUINIT - .type __arm1020_setup, #function __arm1020_setup: mov r0, #0 @@ -477,35 +473,14 @@ arm1020_crval: crval clear=0x0000593f, mmuset=0x00003935, ucset=0x00001930 __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1020, dabort=v4t_early_abort, pabort=legacy_pabort -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm1020_processor_functions, #object -arm1020_processor_functions: - .word v4t_early_abort - .word legacy_pabort - .word cpu_arm1020_proc_init - .word cpu_arm1020_proc_fin - .word cpu_arm1020_reset - .word cpu_arm1020_do_idle - .word cpu_arm1020_dcache_clean_area - .word cpu_arm1020_switch_mm - .word cpu_arm1020_set_pte_ext - .size arm1020_processor_functions, . - arm1020_processor_functions .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name + string cpu_arch_name, "armv5t" + string cpu_elf_name, "v5" .type cpu_arm1020_name, #object cpu_arm1020_name: diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index ab7ec26657e..9d89405c3d0 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -64,7 +64,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 32768 @@ -95,6 +95,7 @@ ENTRY(cpu_arm1020e_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm1020e_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -107,6 +108,8 @@ ENTRY(cpu_arm1020e_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm1020e_reset) + .popsection /* * cpu_arm1020e_do_idle() @@ -232,6 +235,7 @@ ENTRY(arm1020e_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* @@ -350,17 +354,11 @@ ENTRY(arm1020e_dma_unmap_area) mov pc, lr ENDPROC(arm1020e_dma_unmap_area) -ENTRY(arm1020e_cache_fns) - .long arm1020e_flush_icache_all - .long arm1020e_flush_kern_cache_all - .long arm1020e_flush_user_cache_all - .long arm1020e_flush_user_cache_range - .long arm1020e_coherent_kern_range - .long arm1020e_coherent_user_range - .long arm1020e_flush_kern_dcache_area - .long arm1020e_dma_map_area - .long arm1020e_dma_unmap_area - .long arm1020e_dma_flush_range + .globl arm1020e_flush_kern_cache_louis + .equ arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1020e .align 5 ENTRY(cpu_arm1020e_dcache_clean_area) @@ -427,8 +425,6 @@ ENTRY(cpu_arm1020e_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - __CPUINIT - .type __arm1020e_setup, #function __arm1020e_setup: mov r0, #0 @@ -458,40 +454,14 @@ arm1020e_crval: crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm1020e_processor_functions, #object -arm1020e_processor_functions: - .word v4t_early_abort - .word legacy_pabort - .word cpu_arm1020e_proc_init - .word cpu_arm1020e_proc_fin - .word cpu_arm1020e_reset - .word cpu_arm1020e_do_idle - .word cpu_arm1020e_dcache_clean_area - .word cpu_arm1020e_switch_mm - .word cpu_arm1020e_set_pte_ext - .size arm1020e_processor_functions, . - arm1020e_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1020e, dabort=v4t_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5te" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm1020e_name, #object -cpu_arm1020e_name: - .asciz "ARM1020E" - .size cpu_arm1020e_name, . - cpu_arm1020e_name + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_arm1020e_name, "ARM1020E" .align diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 831c5e54e22..6f01a0ae3b3 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -53,7 +53,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 32768 @@ -84,6 +84,7 @@ ENTRY(cpu_arm1022_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm1022_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -96,6 +97,8 @@ ENTRY(cpu_arm1022_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm1022_reset) + .popsection /* * cpu_arm1022_do_idle() @@ -221,6 +224,7 @@ ENTRY(arm1022_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* @@ -339,17 +343,11 @@ ENTRY(arm1022_dma_unmap_area) mov pc, lr ENDPROC(arm1022_dma_unmap_area) -ENTRY(arm1022_cache_fns) - .long arm1022_flush_icache_all - .long arm1022_flush_kern_cache_all - .long arm1022_flush_user_cache_all - .long arm1022_flush_user_cache_range - .long arm1022_coherent_kern_range - .long arm1022_coherent_user_range - .long arm1022_flush_kern_dcache_area - .long arm1022_dma_map_area - .long arm1022_dma_unmap_area - .long arm1022_dma_flush_range + .globl arm1022_flush_kern_cache_louis + .equ arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1022 .align 5 ENTRY(cpu_arm1022_dcache_clean_area) @@ -409,8 +407,6 @@ ENTRY(cpu_arm1022_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - __CPUINIT - .type __arm1022_setup, #function __arm1022_setup: mov r0, #0 @@ -441,40 +437,14 @@ arm1022_crval: crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm1022_processor_functions, #object -arm1022_processor_functions: - .word v4t_early_abort - .word legacy_pabort - .word cpu_arm1022_proc_init - .word cpu_arm1022_proc_fin - .word cpu_arm1022_reset - .word cpu_arm1022_do_idle - .word cpu_arm1022_dcache_clean_area - .word cpu_arm1022_switch_mm - .word cpu_arm1022_set_pte_ext - .size arm1022_processor_functions, . - arm1022_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1022, dabort=v4t_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5te" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm1022_name, #object -cpu_arm1022_name: - .asciz "ARM1022" - .size cpu_arm1022_name, . - cpu_arm1022_name + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_arm1022_name, "ARM1022" .align diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index e3f7e9a166b..4799a24b43e 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -53,7 +53,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 32768 @@ -84,6 +84,7 @@ ENTRY(cpu_arm1026_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm1026_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -96,6 +97,8 @@ ENTRY(cpu_arm1026_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm1026_reset) + .popsection /* * cpu_arm1026_do_idle() @@ -215,6 +218,7 @@ ENTRY(arm1026_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* @@ -333,17 +337,11 @@ ENTRY(arm1026_dma_unmap_area) mov pc, lr ENDPROC(arm1026_dma_unmap_area) -ENTRY(arm1026_cache_fns) - .long arm1026_flush_icache_all - .long arm1026_flush_kern_cache_all - .long arm1026_flush_user_cache_all - .long arm1026_flush_user_cache_range - .long arm1026_coherent_kern_range - .long arm1026_coherent_user_range - .long arm1026_flush_kern_dcache_area - .long arm1026_dma_map_area - .long arm1026_dma_unmap_area - .long arm1026_dma_flush_range + .globl arm1026_flush_kern_cache_louis + .equ arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1026 .align 5 ENTRY(cpu_arm1026_dcache_clean_area) @@ -398,9 +396,6 @@ ENTRY(cpu_arm1026_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - - __CPUINIT - .type __arm1026_setup, #function __arm1026_setup: mov r0, #0 @@ -436,42 +431,15 @@ arm1026_crval: crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001934 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm1026_processor_functions, #object -arm1026_processor_functions: - .word v5t_early_abort - .word legacy_pabort - .word cpu_arm1026_proc_init - .word cpu_arm1026_proc_fin - .word cpu_arm1026_reset - .word cpu_arm1026_do_idle - .word cpu_arm1026_dcache_clean_area - .word cpu_arm1026_switch_mm - .word cpu_arm1026_set_pte_ext - .size arm1026_processor_functions, . - arm1026_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1026, dabort=v5t_early_abort, pabort=legacy_pabort .section .rodata - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5tej" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name + string cpu_arch_name, "armv5tej" + string cpu_elf_name, "v5" .align - - .type cpu_arm1026_name, #object -cpu_arm1026_name: - .asciz "ARM1026EJ-S" - .size cpu_arm1026_name, . - cpu_arm1026_name - + string cpu_arm1026_name, "ARM1026EJ-S" .align .section ".proc.info.init", #alloc, #execinstr diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S deleted file mode 100644 index 6a7be1863ed..00000000000 --- a/arch/arm/mm/proc-arm6_7.S +++ /dev/null @@ -1,421 +0,0 @@ -/* - * linux/arch/arm/mm/proc-arm6,7.S - * - * Copyright (C) 1997-2000 Russell King - * hacked for non-paged-MM by Hyok S. Choi, 2003. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * These are the low level assembler for performing cache and TLB - * functions on the ARM610 & ARM710. - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/assembler.h> -#include <asm/asm-offsets.h> -#include <asm/hwcap.h> -#include <asm/pgtable-hwdef.h> -#include <asm/pgtable.h> -#include <asm/ptrace.h> - -#include "proc-macros.S" - -ENTRY(cpu_arm6_dcache_clean_area) -ENTRY(cpu_arm7_dcache_clean_area) - mov pc, lr - -/* - * Function: arm6_7_data_abort () - * - * Params : r2 = address of aborted instruction - * : sp = pointer to registers - * - * Purpose : obtain information about current aborted instruction - * - * Returns : r0 = address of abort - * : r1 = FSR - */ - -ENTRY(cpu_arm7_data_abort) - mrc p15, 0, r1, c5, c0, 0 @ get FSR - mrc p15, 0, r0, c6, c0, 0 @ get FAR - ldr r8, [r2] @ read arm instruction - tst r8, #1 << 20 @ L = 0 -> write? - orreq r1, r1, #1 << 11 @ yes. - and r7, r8, #15 << 24 - add pc, pc, r7, lsr #22 @ Now branch to the relevant processing routine - nop - -/* 0 */ b .data_unknown -/* 1 */ mov pc, lr @ swp -/* 2 */ b .data_unknown -/* 3 */ b .data_unknown -/* 4 */ b .data_arm_lateldrpostconst @ ldr rd, [rn], #m -/* 5 */ b .data_arm_lateldrpreconst @ ldr rd, [rn, #m] -/* 6 */ b .data_arm_lateldrpostreg @ ldr rd, [rn], rm -/* 7 */ b .data_arm_lateldrprereg @ ldr rd, [rn, rm] -/* 8 */ b .data_arm_ldmstm @ ldm*a rn, <rlist> -/* 9 */ b .data_arm_ldmstm @ ldm*b rn, <rlist> -/* a */ b .data_unknown -/* b */ b .data_unknown -/* c */ mov pc, lr @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m -/* d */ mov pc, lr @ ldc rd, [rn, #m] -/* e */ b .data_unknown -/* f */ -.data_unknown: @ Part of jumptable - mov r0, r2 - mov r1, r8 - mov r2, sp - bl baddataabort - b ret_from_exception - -ENTRY(cpu_arm6_data_abort) - mrc p15, 0, r1, c5, c0, 0 @ get FSR - mrc p15, 0, r0, c6, c0, 0 @ get FAR - ldr r8, [r2] @ read arm instruction - tst r8, #1 << 20 @ L = 0 -> write? - orreq r1, r1, #1 << 11 @ yes. - and r7, r8, #14 << 24 - teq r7, #8 << 24 @ was it ldm/stm - movne pc, lr - -.data_arm_ldmstm: - tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup - mov r7, #0x11 - orr r7, r7, #0x1100 - and r6, r8, r7 - and r2, r8, r7, lsl #1 - add r6, r6, r2, lsr #1 - and r2, r8, r7, lsl #2 - add r6, r6, r2, lsr #2 - and r2, r8, r7, lsl #3 - add r6, r6, r2, lsr #3 - add r6, r6, r6, lsr #8 - add r6, r6, r6, lsr #4 - and r6, r6, #15 @ r6 = no. of registers to transfer. - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' - tst r8, #1 << 23 @ Check U bit - subne r7, r7, r6, lsl #2 @ Undo increment - addeq r7, r7, r6, lsl #2 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr - -.data_arm_apply_r6_and_rn: - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' - tst r8, #1 << 23 @ Check U bit - subne r7, r7, r6 @ Undo incrmenet - addeq r7, r7, r6 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr - -.data_arm_lateldrpreconst: - tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup -.data_arm_lateldrpostconst: - movs r2, r8, lsl #20 @ Get offset - moveq pc, lr @ zero -> no fixup - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' - tst r8, #1 << 23 @ Check U bit - subne r7, r7, r2, lsr #20 @ Undo increment - addeq r7, r7, r2, lsr #20 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr - -.data_arm_lateldrprereg: - tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup -.data_arm_lateldrpostreg: - and r7, r8, #15 @ Extract 'm' from instruction - ldr r6, [sp, r7, lsl #2] @ Get register 'Rm' - mov r5, r8, lsr #7 @ get shift count - ands r5, r5, #31 - and r7, r8, #0x70 @ get shift type - orreq r7, r7, #8 @ shift count = 0 - add pc, pc, r7 - nop - - mov r6, r6, lsl r5 @ 0: LSL #!0 - b .data_arm_apply_r6_and_rn - b .data_arm_apply_r6_and_rn @ 1: LSL #0 - nop - b .data_unknown @ 2: MUL? - nop - b .data_unknown @ 3: MUL? - nop - mov r6, r6, lsr r5 @ 4: LSR #!0 - b .data_arm_apply_r6_and_rn - mov r6, r6, lsr #32 @ 5: LSR #32 - b .data_arm_apply_r6_and_rn - b .data_unknown @ 6: MUL? - nop - b .data_unknown @ 7: MUL? - nop - mov r6, r6, asr r5 @ 8: ASR #!0 - b .data_arm_apply_r6_and_rn - mov r6, r6, asr #32 @ 9: ASR #32 - b .data_arm_apply_r6_and_rn - b .data_unknown @ A: MUL? - nop - b .data_unknown @ B: MUL? - nop - mov r6, r6, ror r5 @ C: ROR #!0 - b .data_arm_apply_r6_and_rn - mov r6, r6, rrx @ D: RRX - b .data_arm_apply_r6_and_rn - b .data_unknown @ E: MUL? - nop - b .data_unknown @ F: MUL? - -/* - * Function: arm6_7_proc_init (void) - * : arm6_7_proc_fin (void) - * - * Notes : This processor does not require these - */ -ENTRY(cpu_arm6_proc_init) -ENTRY(cpu_arm7_proc_init) - mov pc, lr - -ENTRY(cpu_arm6_proc_fin) -ENTRY(cpu_arm7_proc_fin) - mov r0, #0x31 @ ....S..DP...M - mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr - -ENTRY(cpu_arm6_do_idle) -ENTRY(cpu_arm7_do_idle) - mov pc, lr - -/* - * Function: arm6_7_switch_mm(unsigned long pgd_phys) - * Params : pgd_phys Physical address of page table - * Purpose : Perform a task switch, saving the old processes state, and restoring - * the new. - */ -ENTRY(cpu_arm6_switch_mm) -ENTRY(cpu_arm7_switch_mm) -#ifdef CONFIG_MMU - mov r1, #0 - mcr p15, 0, r1, c7, c0, 0 @ flush cache - mcr p15, 0, r0, c2, c0, 0 @ update page table ptr - mcr p15, 0, r1, c5, c0, 0 @ flush TLBs -#endif - mov pc, lr - -/* - * Function: arm6_7_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext) - * Params : r0 = Address to set - * : r1 = value to set - * Purpose : Set a PTE and flush it out of any WB cache - */ - .align 5 -ENTRY(cpu_arm6_set_pte_ext) -ENTRY(cpu_arm7_set_pte_ext) -#ifdef CONFIG_MMU - armv3_set_pte_ext wc_disable=0 -#endif /* CONFIG_MMU */ - mov pc, lr - -/* - * Function: _arm6_7_reset - * Params : r0 = address to jump to - * Notes : This sets up everything for a reset - */ -ENTRY(cpu_arm6_reset) -ENTRY(cpu_arm7_reset) - mov r1, #0 - mcr p15, 0, r1, c7, c0, 0 @ flush cache -#ifdef CONFIG_MMU - mcr p15, 0, r1, c5, c0, 0 @ flush TLB -#endif - mov r1, #0x30 - mcr p15, 0, r1, c1, c0, 0 @ turn off MMU etc - mov pc, r0 - - __CPUINIT - - .type __arm6_setup, #function -__arm6_setup: mov r0, #0 - mcr p15, 0, r0, c7, c0 @ flush caches on v3 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c5, c0 @ flush TLBs on v3 - mov r0, #0x3d @ . ..RS BLDP WCAM - orr r0, r0, #0x100 @ . ..01 0011 1101 -#else - mov r0, #0x3c @ . ..RS BLDP WCA. -#endif - mov pc, lr - .size __arm6_setup, . - __arm6_setup - - .type __arm7_setup, #function -__arm7_setup: mov r0, #0 - mcr p15, 0, r0, c7, c0 @ flush caches on v3 -#ifdef CONFIG_MMU - mcr p15, 0, r0, c5, c0 @ flush TLBs on v3 - mcr p15, 0, r0, c3, c0 @ load domain access register - mov r0, #0x7d @ . ..RS BLDP WCAM - orr r0, r0, #0x100 @ . ..01 0111 1101 -#else - mov r0, #0x7c @ . ..RS BLDP WCA. -#endif - mov pc, lr - .size __arm7_setup, . - __arm7_setup - - __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm6_processor_functions, #object -ENTRY(arm6_processor_functions) - .word cpu_arm6_data_abort - .word legacy_pabort - .word cpu_arm6_proc_init - .word cpu_arm6_proc_fin - .word cpu_arm6_reset - .word cpu_arm6_do_idle - .word cpu_arm6_dcache_clean_area - .word cpu_arm6_switch_mm - .word cpu_arm6_set_pte_ext - .size arm6_processor_functions, . - arm6_processor_functions - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm7_processor_functions, #object -ENTRY(arm7_processor_functions) - .word cpu_arm7_data_abort - .word legacy_pabort - .word cpu_arm7_proc_init - .word cpu_arm7_proc_fin - .word cpu_arm7_reset - .word cpu_arm7_do_idle - .word cpu_arm7_dcache_clean_area - .word cpu_arm7_switch_mm - .word cpu_arm7_set_pte_ext - .size arm7_processor_functions, . - arm7_processor_functions - - .section ".rodata" - - .type cpu_arch_name, #object -cpu_arch_name: .asciz "armv3" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: .asciz "v3" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm6_name, #object -cpu_arm6_name: .asciz "ARM6" - .size cpu_arm6_name, . - cpu_arm6_name - - .type cpu_arm610_name, #object -cpu_arm610_name: - .asciz "ARM610" - .size cpu_arm610_name, . - cpu_arm610_name - - .type cpu_arm7_name, #object -cpu_arm7_name: .asciz "ARM7" - .size cpu_arm7_name, . - cpu_arm7_name - - .type cpu_arm710_name, #object -cpu_arm710_name: - .asciz "ARM710" - .size cpu_arm710_name, . - cpu_arm710_name - - .align - - .section ".proc.info.init", #alloc, #execinstr - - .type __arm6_proc_info, #object -__arm6_proc_info: - .long 0x41560600 - .long 0xfffffff0 - .long 0x00000c1e - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __arm6_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_26BIT - .long cpu_arm6_name - .long arm6_processor_functions - .long v3_tlb_fns - .long v3_user_fns - .long v3_cache_fns - .size __arm6_proc_info, . - __arm6_proc_info - - .type __arm610_proc_info, #object -__arm610_proc_info: - .long 0x41560610 - .long 0xfffffff0 - .long 0x00000c1e - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __arm6_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_26BIT - .long cpu_arm610_name - .long arm6_processor_functions - .long v3_tlb_fns - .long v3_user_fns - .long v3_cache_fns - .size __arm610_proc_info, . - __arm610_proc_info - - .type __arm7_proc_info, #object -__arm7_proc_info: - .long 0x41007000 - .long 0xffffff00 - .long 0x00000c1e - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __arm7_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_26BIT - .long cpu_arm7_name - .long arm7_processor_functions - .long v3_tlb_fns - .long v3_user_fns - .long v3_cache_fns - .size __arm7_proc_info, . - __arm7_proc_info - - .type __arm710_proc_info, #object -__arm710_proc_info: - .long 0x41007100 - .long 0xfff8ff00 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __arm7_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_26BIT - .long cpu_arm710_name - .long arm7_processor_functions - .long v3_tlb_fns - .long v3_user_fns - .long v3_cache_fns - .size __arm710_proc_info, . - __arm710_proc_info diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index c285395f44b..d42c37f9f5b 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -63,7 +63,7 @@ ENTRY(cpu_arm720_proc_fin) /* * Function: arm720_proc_do_idle(void) * Params : r0 = unused - * Purpose : put the processer in proper idle mode + * Purpose : put the processor in proper idle mode */ ENTRY(cpu_arm720_do_idle) mov pc, lr @@ -101,6 +101,7 @@ ENTRY(cpu_arm720_set_pte_ext) * Params : r0 = address to jump to * Notes : This sets up everything for a reset */ + .pushsection .idmap.text, "ax" ENTRY(cpu_arm720_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate cache @@ -112,8 +113,8 @@ ENTRY(cpu_arm720_reset) bic ip, ip, #0x2100 @ ..v....s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 - - __CPUINIT +ENDPROC(cpu_arm720_reset) + .popsection .type __arm710_setup, #function __arm710_setup: @@ -169,43 +170,15 @@ arm720_crval: crval clear=0x00002f3f, mmuset=0x0000213d, ucset=0x00000130 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm720_processor_functions, #object -ENTRY(arm720_processor_functions) - .word v4t_late_abort - .word legacy_pabort - .word cpu_arm720_proc_init - .word cpu_arm720_proc_fin - .word cpu_arm720_reset - .word cpu_arm720_do_idle - .word cpu_arm720_dcache_clean_area - .word cpu_arm720_switch_mm - .word cpu_arm720_set_pte_ext - .size arm720_processor_functions, . - arm720_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm720, dabort=v4t_late_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: .asciz "armv4t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm710_name, #object -cpu_arm710_name: - .asciz "ARM710T" - .size cpu_arm710_name, . - cpu_arm710_name - - .type cpu_arm720_name, #object -cpu_arm720_name: - .asciz "ARM720T" - .size cpu_arm720_name, . - cpu_arm720_name + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm710_name, "ARM710T" + string cpu_arm720_name, "ARM720T" .align @@ -215,10 +188,11 @@ cpu_arm720_name: .section ".proc.info.init", #alloc, #execinstr - .type __arm710_proc_info, #object -__arm710_proc_info: - .long 0x41807100 @ cpu_val - .long 0xffffff00 @ cpu_mask +.macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask .long PMD_TYPE_SECT | \ PMD_SECT_BUFFERABLE | \ PMD_SECT_CACHEABLE | \ @@ -229,38 +203,17 @@ __arm710_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm710_setup @ cpu_flush + b \cpu_flush @ cpu_flush .long cpu_arch_name @ arch_name .long cpu_elf_name @ elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB @ elf_hwcap - .long cpu_arm710_name @ name + .long \cpu_name .long arm720_processor_functions .long v4_tlb_fns .long v4wt_user_fns .long v4_cache_fns - .size __arm710_proc_info, . - __arm710_proc_info + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm - .type __arm720_proc_info, #object -__arm720_proc_info: - .long 0x41807200 @ cpu_val - .long 0xffffff00 @ cpu_mask - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __arm720_setup @ cpu_flush - .long cpu_arch_name @ arch_name - .long cpu_elf_name @ elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB @ elf_hwcap - .long cpu_arm720_name @ name - .long arm720_processor_functions - .long v4_tlb_fns - .long v4wt_user_fns - .long v4_cache_fns - .size __arm720_proc_info, . - __arm720_proc_info + arm720_proc_info arm710, 0x41807100, 0xffffff00, cpu_arm710_name, __arm710_setup + arm720_proc_info arm720, 0x41807200, 0xffffff00, cpu_arm720_name, __arm720_setup diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index 38b27dcba72..9b0ae90cbf1 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -17,6 +17,8 @@ #include <asm/pgtable.h> #include <asm/ptrace.h> +#include "proc-macros.S" + .text /* * cpu_arm740_proc_init() @@ -47,6 +49,7 @@ ENTRY(cpu_arm740_proc_fin) * Params : r0 = address to jump to * Notes : This sets up everything for a reset */ + .pushsection .idmap.text, "ax" ENTRY(cpu_arm740_reset) mov ip, #0 mcr p15, 0, ip, c7, c0, 0 @ invalidate cache @@ -54,8 +57,8 @@ ENTRY(cpu_arm740_reset) bic ip, ip, #0x0000000c @ ............wc.. mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 - - __CPUINIT +ENDPROC(cpu_arm740_reset) + .popsection .type __arm740_setup, #function __arm740_setup: @@ -72,24 +75,27 @@ __arm740_setup: mcr p15, 0, r0, c6, c0 @ set area 0, default ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM - ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 + ldr r3, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, r4, lsl #1 @ the area register value orr r0, r0, #1 @ set enable bit mcr p15, 0, r0, c6, c1 @ set area 1, RAM ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH - ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 + ldr r3, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) + cmp r3, #0 + moveq r0, #0 + beq 2f + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, r4, lsl #1 @ the area register value orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH +2: mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH mov r0, #0x06 mcr p15, 0, r0, c2, c0 @ Region 1&2 cacheable @@ -115,39 +121,14 @@ __arm740_setup: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm740_processor_functions, #object -ENTRY(arm740_processor_functions) - .word v4t_late_abort - .word legacy_pabort - .word cpu_arm740_proc_init - .word cpu_arm740_proc_fin - .word cpu_arm740_reset - .word cpu_arm740_do_idle - .word cpu_arm740_dcache_clean_area - .word cpu_arm740_switch_mm - .word 0 @ cpu_*_set_pte - .size arm740_processor_functions, . - arm740_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm740, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm740_name, #object -cpu_arm740_name: - .ascii "ARM740T" - .size cpu_arm740_name, . - cpu_arm740_name + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_arm740_name, "ARM740T" .align @@ -157,15 +138,14 @@ __arm740_proc_info: .long 0x41807400 .long 0xfffffff0 .long 0 + .long 0 b __arm740_setup .long cpu_arch_name .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT .long cpu_arm740_name .long arm740_processor_functions .long 0 .long 0 - .long v3_cache_fns @ cache model + .long v4_cache_fns @ cache model .size __arm740_proc_info, . - __arm740_proc_info - - diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index 0c9786de20a..f6cc3f63ce3 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -17,6 +17,8 @@ #include <asm/pgtable.h> #include <asm/ptrace.h> +#include "proc-macros.S" + .text /* * cpu_arm7tdmi_proc_init() @@ -43,10 +45,11 @@ ENTRY(cpu_arm7tdmi_proc_fin) * Params : loc(r0) address to jump to * Purpose : Sets up everything for a reset and jump to the location for soft reset. */ + .pushsection .idmap.text, "ax" ENTRY(cpu_arm7tdmi_reset) mov pc, r0 - - __CPUINIT +ENDPROC(cpu_arm7tdmi_reset) + .popsection .type __arm7tdmi_setup, #function __arm7tdmi_setup: @@ -55,194 +58,57 @@ __arm7tdmi_setup: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm7tdmi_processor_functions, #object -ENTRY(arm7tdmi_processor_functions) - .word v4t_late_abort - .word legacy_pabort - .word cpu_arm7tdmi_proc_init - .word cpu_arm7tdmi_proc_fin - .word cpu_arm7tdmi_reset - .word cpu_arm7tdmi_do_idle - .word cpu_arm7tdmi_dcache_clean_area - .word cpu_arm7tdmi_switch_mm - .word 0 @ cpu_*_set_pte - .size arm7tdmi_processor_functions, . - arm7tdmi_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm7tdmi, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm7tdmi_name, #object -cpu_arm7tdmi_name: - .asciz "ARM7TDMI" - .size cpu_arm7tdmi_name, . - cpu_arm7tdmi_name - - .type cpu_triscenda7_name, #object -cpu_triscenda7_name: - .asciz "Triscend-A7x" - .size cpu_triscenda7_name, . - cpu_triscenda7_name - - .type cpu_at91_name, #object -cpu_at91_name: - .asciz "Atmel-AT91M40xxx" - .size cpu_at91_name, . - cpu_at91_name - - .type cpu_s3c3410_name, #object -cpu_s3c3410_name: - .asciz "Samsung-S3C3410" - .size cpu_s3c3410_name, . - cpu_s3c3410_name - - .type cpu_s3c44b0x_name, #object -cpu_s3c44b0x_name: - .asciz "Samsung-S3C44B0x" - .size cpu_s3c44b0x_name, . - cpu_s3c44b0x_name - - .type cpu_s3c4510b, #object -cpu_s3c4510b_name: - .asciz "Samsung-S3C4510B" - .size cpu_s3c4510b_name, . - cpu_s3c4510b_name - - .type cpu_s3c4530_name, #object -cpu_s3c4530_name: - .asciz "Samsung-S3C4530" - .size cpu_s3c4530_name, . - cpu_s3c4530_name - - .type cpu_netarm_name, #object -cpu_netarm_name: - .asciz "NETARM" - .size cpu_netarm_name, . - cpu_netarm_name + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm7tdmi_name, "ARM7TDMI" + string cpu_triscenda7_name, "Triscend-A7x" + string cpu_at91_name, "Atmel-AT91M40xxx" + string cpu_s3c3410_name, "Samsung-S3C3410" + string cpu_s3c44b0x_name, "Samsung-S3C44B0x" + string cpu_s3c4510b_name, "Samsung-S3C4510B" + string cpu_s3c4530_name, "Samsung-S3C4530" + string cpu_netarm_name, "NETARM" .align .section ".proc.info.init", #alloc, #execinstr - .type __arm7tdmi_proc_info, #object -__arm7tdmi_proc_info: - .long 0x41007700 - .long 0xfff8ff00 - .long 0 - .long 0 - b __arm7tdmi_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_26BIT - .long cpu_arm7tdmi_name - .long arm7tdmi_processor_functions - .long 0 - .long 0 - .long v4_cache_fns - .size __arm7tdmi_proc_info, . - __arm7dmi_proc_info - - .type __triscenda7_proc_info, #object -__triscenda7_proc_info: - .long 0x0001d2ff - .long 0x0001ffff - .long 0 - .long 0 - b __arm7tdmi_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT - .long cpu_triscenda7_name - .long arm7tdmi_processor_functions - .long 0 - .long 0 - .long v4_cache_fns - .size __triscenda7_proc_info, . - __triscenda7_proc_info - - .type __at91_proc_info, #object -__at91_proc_info: - .long 0x14000040 - .long 0xfff000e0 - .long 0 - .long 0 - b __arm7tdmi_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT - .long cpu_at91_name - .long arm7tdmi_processor_functions - .long 0 - .long 0 - .long v4_cache_fns - .size __at91_proc_info, . - __at91_proc_info - - .type __s3c4510b_proc_info, #object -__s3c4510b_proc_info: - .long 0x36365000 - .long 0xfffff000 - .long 0 - .long 0 - b __arm7tdmi_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT - .long cpu_s3c4510b_name - .long arm7tdmi_processor_functions - .long 0 - .long 0 - .long v4_cache_fns - .size __s3c4510b_proc_info, . - __s3c4510b_proc_info - - .type __s3c4530_proc_info, #object -__s3c4530_proc_info: - .long 0x4c000000 - .long 0xfff000e0 - .long 0 - .long 0 - b __arm7tdmi_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT - .long cpu_s3c4530_name - .long arm7tdmi_processor_functions - .long 0 - .long 0 - .long v4_cache_fns - .size __s3c4530_proc_info, . - __s3c4530_proc_info - - .type __s3c3410_proc_info, #object -__s3c3410_proc_info: - .long 0x34100000 - .long 0xffff0000 - .long 0 - .long 0 - b __arm7tdmi_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT - .long cpu_s3c3410_name - .long arm7tdmi_processor_functions - .long 0 - .long 0 - .long v4_cache_fns - .size __s3c3410_proc_info, . - __s3c3410_proc_info - - .type __s3c44b0x_proc_info, #object -__s3c44b0x_proc_info: - .long 0x44b00000 - .long 0xffff0000 +.macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ + extra_hwcaps=0 + .type __\name\()_proc_info, #object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask .long 0 .long 0 b __arm7tdmi_setup .long cpu_arch_name .long cpu_elf_name - .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT - .long cpu_s3c44b0x_name + .long HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps ) + .long \cpu_name .long arm7tdmi_processor_functions .long 0 .long 0 .long v4_cache_fns - .size __s3c44b0x_proc_info, . - __s3c44b0x_proc_info + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm7tdmi_proc_info arm7tdmi, 0x41007700, 0xfff8ff00, \ + cpu_arm7tdmi_name + arm7tdmi_proc_info triscenda7, 0x0001d2ff, 0x0001ffff, \ + cpu_triscenda7_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info at91, 0x14000040, 0xfff000e0, \ + cpu_at91_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c4510b, 0x36365000, 0xfffff000, \ + cpu_s3c4510b_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c4530, 0x4c000000, 0xfff000e0, \ + cpu_s3c4530_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c3410, 0x34100000, 0xffff0000, \ + cpu_s3c3410_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c44b0x, 0x44b00000, 0xffff0000, \ + cpu_s3c44b0x_name, extra_hwcaps=HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 6109f278a90..549557df6d5 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -53,7 +53,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 65536 @@ -85,6 +85,7 @@ ENTRY(cpu_arm920_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm920_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -97,6 +98,8 @@ ENTRY(cpu_arm920_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm920_reset) + .popsection /* * cpu_arm920_do_idle() @@ -207,6 +210,7 @@ ENTRY(arm920_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* @@ -315,18 +319,11 @@ ENTRY(arm920_dma_unmap_area) mov pc, lr ENDPROC(arm920_dma_unmap_area) -ENTRY(arm920_cache_fns) - .long arm920_flush_icache_all - .long arm920_flush_kern_cache_all - .long arm920_flush_user_cache_all - .long arm920_flush_user_cache_range - .long arm920_coherent_kern_range - .long arm920_coherent_user_range - .long arm920_flush_kern_dcache_area - .long arm920_dma_map_area - .long arm920_dma_unmap_area - .long arm920_dma_flush_range + .globl arm920_flush_kern_cache_louis + .equ arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm920 #endif @@ -387,7 +384,31 @@ ENTRY(cpu_arm920_set_pte_ext) #endif mov pc, lr - __CPUINIT +/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ +.globl cpu_arm920_suspend_size +.equ cpu_arm920_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_arm920_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c13, c0, 0 @ PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c1, c0, 0 @ Control register + stmia r0, {r4 - r6} + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_arm920_do_suspend) + +ENTRY(cpu_arm920_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches + ldmia r0, {r4 - r6} + mcr p15, 0, r4, c13, c0, 0 @ PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + mcr p15, 0, r1, c2, c0, 0 @ TTB address + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_arm920_do_resume) +#endif .type __arm920_setup, #function __arm920_setup: @@ -416,40 +437,14 @@ arm920_crval: crval clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm920_processor_functions, #object -arm920_processor_functions: - .word v4t_early_abort - .word legacy_pabort - .word cpu_arm920_proc_init - .word cpu_arm920_proc_fin - .word cpu_arm920_reset - .word cpu_arm920_do_idle - .word cpu_arm920_dcache_clean_area - .word cpu_arm920_switch_mm - .word cpu_arm920_set_pte_ext - .size arm920_processor_functions, . - arm920_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm920, dabort=v4t_early_abort, pabort=legacy_pabort, suspend=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm920_name, #object -cpu_arm920_name: - .asciz "ARM920T" - .size cpu_arm920_name, . - cpu_arm920_name + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm920_name, "ARM920T" .align diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index bb2f0f46a5e..2a758b06c6f 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -54,7 +54,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. (I think this should + * cache line maintenance instructions. (I think this should * be 32768). */ #define CACHE_DLIMIT 8192 @@ -87,6 +87,7 @@ ENTRY(cpu_arm922_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm922_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -99,6 +100,8 @@ ENTRY(cpu_arm922_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm922_reset) + .popsection /* * cpu_arm922_do_idle() @@ -209,6 +212,7 @@ ENTRY(arm922_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* @@ -317,18 +321,11 @@ ENTRY(arm922_dma_unmap_area) mov pc, lr ENDPROC(arm922_dma_unmap_area) -ENTRY(arm922_cache_fns) - .long arm922_flush_icache_all - .long arm922_flush_kern_cache_all - .long arm922_flush_user_cache_all - .long arm922_flush_user_cache_range - .long arm922_coherent_kern_range - .long arm922_coherent_user_range - .long arm922_flush_kern_dcache_area - .long arm922_dma_map_area - .long arm922_dma_unmap_area - .long arm922_dma_flush_range + .globl arm922_flush_kern_cache_louis + .equ arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm922 #endif @@ -391,8 +388,6 @@ ENTRY(cpu_arm922_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - __CPUINIT - .type __arm922_setup, #function __arm922_setup: mov r0, #0 @@ -420,40 +415,14 @@ arm922_crval: crval clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm922_processor_functions, #object -arm922_processor_functions: - .word v4t_early_abort - .word legacy_pabort - .word cpu_arm922_proc_init - .word cpu_arm922_proc_fin - .word cpu_arm922_reset - .word cpu_arm922_do_idle - .word cpu_arm922_dcache_clean_area - .word cpu_arm922_switch_mm - .word cpu_arm922_set_pte_ext - .size arm922_processor_functions, . - arm922_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm922, dabort=v4t_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm922_name, #object -cpu_arm922_name: - .asciz "ARM922T" - .size cpu_arm922_name, . - cpu_arm922_name + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm922_name, "ARM922T" .align diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index c13e01accfe..ba0d58e1a2a 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -77,7 +77,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 8192 @@ -108,6 +108,7 @@ ENTRY(cpu_arm925_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm925_reset) /* Send software reset to MPU and DSP */ mov ip, #0xff000000 @@ -115,6 +116,8 @@ ENTRY(cpu_arm925_reset) orr ip, ip, #0x0000ce00 mov r4, #1 strh r4, [ip, #0x10] +ENDPROC(cpu_arm925_reset) + .popsection mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -255,6 +258,7 @@ ENTRY(arm925_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* @@ -372,17 +376,11 @@ ENTRY(arm925_dma_unmap_area) mov pc, lr ENDPROC(arm925_dma_unmap_area) -ENTRY(arm925_cache_fns) - .long arm925_flush_icache_all - .long arm925_flush_kern_cache_all - .long arm925_flush_user_cache_all - .long arm925_flush_user_cache_range - .long arm925_coherent_kern_range - .long arm925_coherent_user_range - .long arm925_flush_kern_dcache_area - .long arm925_dma_map_area - .long arm925_dma_unmap_area - .long arm925_dma_flush_range + .globl arm925_flush_kern_cache_louis + .equ arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm925 ENTRY(cpu_arm925_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -440,8 +438,6 @@ ENTRY(cpu_arm925_set_pte_ext) #endif /* CONFIG_MMU */ mov pc, lr - __CPUINIT - .type __arm925_setup, #function __arm925_setup: mov r0, #0 @@ -487,50 +483,26 @@ arm925_crval: crval clear=0x00007f3f, mmuset=0x0000313d, ucset=0x00001130 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm925_processor_functions, #object -arm925_processor_functions: - .word v4t_early_abort - .word legacy_pabort - .word cpu_arm925_proc_init - .word cpu_arm925_proc_fin - .word cpu_arm925_reset - .word cpu_arm925_do_idle - .word cpu_arm925_dcache_clean_area - .word cpu_arm925_switch_mm - .word cpu_arm925_set_pte_ext - .size arm925_processor_functions, . - arm925_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm925, dabort=v4t_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm925_name, #object -cpu_arm925_name: - .asciz "ARM925T" - .size cpu_arm925_name, . - cpu_arm925_name + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm925_name, "ARM925T" .align .section ".proc.info.init", #alloc, #execinstr - .type __arm925_proc_info,#object -__arm925_proc_info: - .long 0x54029250 - .long 0xfffffff0 +.macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask .long PMD_TYPE_SECT | \ + PMD_SECT_CACHEABLE | \ PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ @@ -547,27 +519,8 @@ __arm925_proc_info: .long v4wbi_tlb_fns .long v4wb_user_fns .long arm925_cache_fns - .size __arm925_proc_info, . - __arm925_proc_info + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm - .type __arm915_proc_info,#object -__arm915_proc_info: - .long 0x54029150 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __arm925_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB - .long cpu_arm925_name - .long arm925_processor_functions - .long v4wbi_tlb_fns - .long v4wb_user_fns - .long arm925_cache_fns - .size __arm925_proc_info, . - __arm925_proc_info + arm925_proc_info arm925, 0x54029250, 0xfffffff0, cpu_arm925_name + arm925_proc_info arm915, 0x54029150, 0xfffffff0, cpu_arm925_name diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 42eb4315740..0f098f407c9 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -77,6 +77,7 @@ ENTRY(cpu_arm926_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm926_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -89,6 +90,8 @@ ENTRY(cpu_arm926_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm926_reset) + .popsection /* * cpu_arm926_do_idle() @@ -218,6 +221,7 @@ ENTRY(arm926_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* @@ -335,17 +339,11 @@ ENTRY(arm926_dma_unmap_area) mov pc, lr ENDPROC(arm926_dma_unmap_area) -ENTRY(arm926_cache_fns) - .long arm926_flush_icache_all - .long arm926_flush_kern_cache_all - .long arm926_flush_user_cache_all - .long arm926_flush_user_cache_range - .long arm926_coherent_kern_range - .long arm926_coherent_user_range - .long arm926_flush_kern_dcache_area - .long arm926_dma_map_area - .long arm926_dma_unmap_area - .long arm926_dma_flush_range + .globl arm926_flush_kern_cache_louis + .equ arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm926 ENTRY(cpu_arm926_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -401,7 +399,31 @@ ENTRY(cpu_arm926_set_pte_ext) #endif mov pc, lr - __CPUINIT +/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ +.globl cpu_arm926_suspend_size +.equ cpu_arm926_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_arm926_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c13, c0, 0 @ PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c1, c0, 0 @ Control register + stmia r0, {r4 - r6} + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_arm926_do_suspend) + +ENTRY(cpu_arm926_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches + ldmia r0, {r4 - r6} + mcr p15, 0, r4, c13, c0, 0 @ PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + mcr p15, 0, r1, c2, c0, 0 @ TTB address + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_arm926_do_resume) +#endif .type __arm926_setup, #function __arm926_setup: @@ -441,39 +463,14 @@ arm926_crval: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm926_processor_functions, #object -arm926_processor_functions: - .word v5tj_early_abort - .word legacy_pabort - .word cpu_arm926_proc_init - .word cpu_arm926_proc_fin - .word cpu_arm926_reset - .word cpu_arm926_do_idle - .word cpu_arm926_dcache_clean_area - .word cpu_arm926_switch_mm - .word cpu_arm926_set_pte_ext - .size arm926_processor_functions, . - arm926_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm926, dabort=v5tj_early_abort, pabort=legacy_pabort, suspend=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5tej" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm926_name, #object -cpu_arm926_name: - .asciz "ARM926EJ-S" - .size cpu_arm926_name, . - cpu_arm926_name + string cpu_arch_name, "armv5tej" + string cpu_elf_name, "v5" + string cpu_arm926_name, "ARM926EJ-S" .align diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index 7b11cdb9935..1c39a704ff6 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -48,6 +48,7 @@ ENTRY(cpu_arm940_proc_fin) * Params : r0 = address to jump to * Notes : This sets up everything for a reset */ + .pushsection .idmap.text, "ax" ENTRY(cpu_arm940_reset) mov ip, #0 mcr p15, 0, ip, c7, c5, 0 @ flush I cache @@ -58,6 +59,8 @@ ENTRY(cpu_arm940_reset) bic ip, ip, #0x00001000 @ i-cache mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm940_reset) + .popsection /* * cpu_arm940_do_idle() @@ -157,7 +160,7 @@ ENTRY(arm940_coherent_user_range) * - size - region size */ ENTRY(arm940_flush_kern_dcache_area) - mov ip, #0 + mov r0, #0 mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments 1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries 2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index @@ -165,8 +168,8 @@ ENTRY(arm940_flush_kern_dcache_area) bcs 2b @ entries 63 to 0 subs r1, r1, #1 << 4 bcs 1b @ segments 7 to 0 - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr /* @@ -264,19 +267,11 @@ ENTRY(arm940_dma_unmap_area) mov pc, lr ENDPROC(arm940_dma_unmap_area) -ENTRY(arm940_cache_fns) - .long arm940_flush_icache_all - .long arm940_flush_kern_cache_all - .long arm940_flush_user_cache_all - .long arm940_flush_user_cache_range - .long arm940_coherent_kern_range - .long arm940_coherent_user_range - .long arm940_flush_kern_dcache_area - .long arm940_dma_map_area - .long arm940_dma_unmap_area - .long arm940_dma_flush_range + .globl arm940_flush_kern_cache_louis + .equ arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all - __CPUINIT + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm940 .type __arm940_setup, #function __arm940_setup: @@ -348,39 +343,14 @@ __arm940_setup: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm940_processor_functions, #object -ENTRY(arm940_processor_functions) - .word nommu_early_abort - .word legacy_pabort - .word cpu_arm940_proc_init - .word cpu_arm940_proc_fin - .word cpu_arm940_reset - .word cpu_arm940_do_idle - .word cpu_arm940_dcache_clean_area - .word cpu_arm940_switch_mm - .word 0 @ cpu_*_set_pte - .size arm940_processor_functions, . - arm940_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm940, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 .section ".rodata" -.type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm940_name, #object -cpu_arm940_name: - .ascii "ARM940T" - .size cpu_arm940_name, . - cpu_arm940_name + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm940_name, "ARM940T" .align diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 1a5bbf08034..0289cd905e7 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -55,6 +55,7 @@ ENTRY(cpu_arm946_proc_fin) * Params : r0 = address to jump to * Notes : This sets up everything for a reset */ + .pushsection .idmap.text, "ax" ENTRY(cpu_arm946_reset) mov ip, #0 mcr p15, 0, ip, c7, c5, 0 @ flush I cache @@ -65,6 +66,8 @@ ENTRY(cpu_arm946_reset) bic ip, ip, #0x00001000 @ i-cache mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm946_reset) + .popsection /* * cpu_arm946_do_idle() @@ -187,6 +190,7 @@ ENTRY(arm946_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* @@ -306,18 +310,11 @@ ENTRY(arm946_dma_unmap_area) mov pc, lr ENDPROC(arm946_dma_unmap_area) -ENTRY(arm946_cache_fns) - .long arm946_flush_icache_all - .long arm946_flush_kern_cache_all - .long arm946_flush_user_cache_all - .long arm946_flush_user_cache_range - .long arm946_coherent_kern_range - .long arm946_coherent_user_range - .long arm946_flush_kern_dcache_area - .long arm946_dma_map_area - .long arm946_dma_unmap_area - .long arm946_dma_flush_range + .globl arm946_flush_kern_cache_louis + .equ arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm946 ENTRY(cpu_arm946_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -329,8 +326,6 @@ ENTRY(cpu_arm946_dcache_clean_area) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr - __CPUINIT - .type __arm946_setup, #function __arm946_setup: mov r0, #0 @@ -403,40 +398,14 @@ __arm946_setup: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm946_processor_functions, #object -ENTRY(arm946_processor_functions) - .word nommu_early_abort - .word legacy_pabort - .word cpu_arm946_proc_init - .word cpu_arm946_proc_fin - .word cpu_arm946_reset - .word cpu_arm946_do_idle - - .word cpu_arm946_dcache_clean_area - .word cpu_arm946_switch_mm - .word 0 @ cpu_*_set_pte - .size arm946_processor_functions, . - arm946_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm946, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5te" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5t" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm946_name, #object -cpu_arm946_name: - .ascii "ARM946E-S" - .size cpu_arm946_name, . - cpu_arm946_name + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5t" + string cpu_arm946_name, "ARM946E-S" .align @@ -446,6 +415,7 @@ __arm946_proc_info: .long 0x41009460 .long 0xff00fff0 .long 0 + .long 0 b __arm946_setup .long cpu_arch_name .long cpu_elf_name @@ -454,6 +424,6 @@ __arm946_proc_info: .long arm946_processor_functions .long 0 .long 0 - .long arm940_cache_fns + .long arm946_cache_fns .size __arm946_proc_info, . - __arm946_proc_info diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index db67e3134d7..f51197ba754 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -17,6 +17,8 @@ #include <asm/pgtable.h> #include <asm/ptrace.h> +#include "proc-macros.S" + .text /* * cpu_arm9tdmi_proc_init() @@ -43,10 +45,11 @@ ENTRY(cpu_arm9tdmi_proc_fin) * Params : loc(r0) address to jump to * Purpose : Sets up everything for a reset and jump to the location for soft reset. */ + .pushsection .idmap.text, "ax" ENTRY(cpu_arm9tdmi_reset) mov pc, r0 - - __CPUINIT +ENDPROC(cpu_arm9tdmi_reset) + .popsection .type __arm9tdmi_setup, #function __arm9tdmi_setup: @@ -55,79 +58,38 @@ __arm9tdmi_setup: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm9tdmi_processor_functions, #object -ENTRY(arm9tdmi_processor_functions) - .word nommu_early_abort - .word legacy_pabort - .word cpu_arm9tdmi_proc_init - .word cpu_arm9tdmi_proc_fin - .word cpu_arm9tdmi_reset - .word cpu_arm9tdmi_do_idle - .word cpu_arm9tdmi_dcache_clean_area - .word cpu_arm9tdmi_switch_mm - .word 0 @ cpu_*_set_pte - .size arm9tdmi_processor_functions, . - arm9tdmi_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm9tdmi, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm9tdmi_name, #object -cpu_arm9tdmi_name: - .asciz "ARM9TDMI" - .size cpu_arm9tdmi_name, . - cpu_arm9tdmi_name - - .type cpu_p2001_name, #object -cpu_p2001_name: - .asciz "P2001" - .size cpu_p2001_name, . - cpu_p2001_name + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm9tdmi_name, "ARM9TDMI" + string cpu_p2001_name, "P2001" .align .section ".proc.info.init", #alloc, #execinstr - .type __arm9tdmi_proc_info, #object -__arm9tdmi_proc_info: - .long 0x41009900 - .long 0xfff8ff00 +.macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req + .type __\name\()_proc_info, #object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask .long 0 .long 0 b __arm9tdmi_setup .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT - .long cpu_arm9tdmi_name + .long \cpu_name .long arm9tdmi_processor_functions .long 0 .long 0 .long v4_cache_fns - .size __arm9tdmi_proc_info, . - __arm9dmi_proc_info + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm - .type __p2001_proc_info, #object -__p2001_proc_info: - .long 0x41029000 - .long 0xffffffff - .long 0 - .long 0 - b __arm9tdmi_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT - .long cpu_p2001_name - .long arm9tdmi_processor_functions - .long 0 - .long 0 - .long v4_cache_fns - .size __p2001_proc_info, . - __p2001_proc_info + arm9tdmi_proc_info arm9tdmi, 0x41009900, 0xfff8ff00, cpu_arm9tdmi_name + arm9tdmi_proc_info p2001, 0x41029000, 0xffffffff, cpu_p2001_name diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S index 7c9ad621f0e..2dfc0f1d3bf 100644 --- a/arch/arm/mm/proc-fa526.S +++ b/arch/arm/mm/proc-fa526.S @@ -22,7 +22,6 @@ #include <asm/pgtable.h> #include <asm/page.h> #include <asm/ptrace.h> -#include <asm/system.h> #include "proc-macros.S" @@ -57,6 +56,7 @@ ENTRY(cpu_fa526_proc_fin) * loc: location to jump to for soft reset */ .align 4 + .pushsection .idmap.text, "ax" ENTRY(cpu_fa526_reset) /* TODO: Use CP8 if possible... */ mov ip, #0 @@ -73,13 +73,14 @@ ENTRY(cpu_fa526_reset) nop nop mov pc, r0 +ENDPROC(cpu_fa526_reset) + .popsection /* * cpu_fa526_do_idle() */ .align 4 ENTRY(cpu_fa526_do_idle) - mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt mov pc, lr @@ -134,8 +135,6 @@ ENTRY(cpu_fa526_set_pte_ext) #endif mov pc, lr - __CPUINIT - .type __fa526_setup, #function __fa526_setup: /* On return of this routine, r0 must carry correct flags for CFG register */ @@ -180,39 +179,14 @@ fa526_cr1_set: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type fa526_processor_functions, #object -fa526_processor_functions: - .word v4_early_abort - .word legacy_pabort - .word cpu_fa526_proc_init - .word cpu_fa526_proc_fin - .word cpu_fa526_reset - .word cpu_fa526_do_idle - .word cpu_fa526_dcache_clean_area - .word cpu_fa526_switch_mm - .word cpu_fa526_set_pte_ext - .size fa526_processor_functions, . - fa526_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions fa526, dabort=v4_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_fa526_name, #object -cpu_fa526_name: - .asciz "FA526" - .size cpu_fa526_name, . - cpu_fa526_name + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_fa526_name, "FA526" .align diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index b4597edbff9..db79b62c92f 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -98,6 +98,7 @@ ENTRY(cpu_feroceon_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_feroceon_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -110,6 +111,8 @@ ENTRY(cpu_feroceon_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_feroceon_reset) + .popsection /* * cpu_feroceon_do_idle() @@ -229,6 +232,7 @@ ENTRY(feroceon_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* @@ -411,29 +415,32 @@ ENTRY(feroceon_dma_unmap_area) mov pc, lr ENDPROC(feroceon_dma_unmap_area) -ENTRY(feroceon_cache_fns) - .long feroceon_flush_icache_all - .long feroceon_flush_kern_cache_all - .long feroceon_flush_user_cache_all - .long feroceon_flush_user_cache_range - .long feroceon_coherent_kern_range - .long feroceon_coherent_user_range - .long feroceon_flush_kern_dcache_area - .long feroceon_dma_map_area - .long feroceon_dma_unmap_area - .long feroceon_dma_flush_range - -ENTRY(feroceon_range_cache_fns) - .long feroceon_flush_icache_all - .long feroceon_flush_kern_cache_all - .long feroceon_flush_user_cache_all - .long feroceon_flush_user_cache_range - .long feroceon_coherent_kern_range - .long feroceon_coherent_user_range - .long feroceon_range_flush_kern_dcache_area - .long feroceon_range_dma_map_area - .long feroceon_dma_unmap_area - .long feroceon_range_dma_flush_range + .globl feroceon_flush_kern_cache_louis + .equ feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions feroceon + +.macro range_alias basename + .globl feroceon_range_\basename + .type feroceon_range_\basename , %function + .equ feroceon_range_\basename , feroceon_\basename +.endm + +/* + * Most of the cache functions are unchanged for this case. + * Export suitable alias symbols for the unchanged functions: + */ + range_alias flush_icache_all + range_alias flush_user_cache_all + range_alias flush_kern_cache_all + range_alias flush_kern_cache_louis + range_alias flush_user_cache_range + range_alias coherent_kern_range + range_alias coherent_user_range + range_alias dma_unmap_area + + define_cache_functions feroceon_range .align 5 ENTRY(cpu_feroceon_dcache_clean_area) @@ -507,7 +514,31 @@ ENTRY(cpu_feroceon_set_pte_ext) #endif mov pc, lr - __CPUINIT +/* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */ +.globl cpu_feroceon_suspend_size +.equ cpu_feroceon_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_feroceon_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c13, c0, 0 @ PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c1, c0, 0 @ Control register + stmia r0, {r4 - r6} + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_feroceon_do_suspend) + +ENTRY(cpu_feroceon_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches + ldmia r0, {r4 - r6} + mcr p15, 0, r4, c13, c0, 0 @ PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + mcr p15, 0, r1, c2, c0, 0 @ TTB address + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_feroceon_do_resume) +#endif .type __feroceon_setup, #function __feroceon_setup: @@ -539,90 +570,27 @@ feroceon_crval: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type feroceon_processor_functions, #object -feroceon_processor_functions: - .word v5t_early_abort - .word legacy_pabort - .word cpu_feroceon_proc_init - .word cpu_feroceon_proc_fin - .word cpu_feroceon_reset - .word cpu_feroceon_do_idle - .word cpu_feroceon_dcache_clean_area - .word cpu_feroceon_switch_mm - .word cpu_feroceon_set_pte_ext - .size feroceon_processor_functions, . - feroceon_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions feroceon, dabort=v5t_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5te" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_feroceon_name, #object -cpu_feroceon_name: - .asciz "Feroceon" - .size cpu_feroceon_name, . - cpu_feroceon_name - - .type cpu_88fr531_name, #object -cpu_88fr531_name: - .asciz "Feroceon 88FR531-vd" - .size cpu_88fr531_name, . - cpu_88fr531_name - - .type cpu_88fr571_name, #object -cpu_88fr571_name: - .asciz "Feroceon 88FR571-vd" - .size cpu_88fr571_name, . - cpu_88fr571_name - - .type cpu_88fr131_name, #object -cpu_88fr131_name: - .asciz "Feroceon 88FR131" - .size cpu_88fr131_name, . - cpu_88fr131_name + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_feroceon_name, "Feroceon" + string cpu_88fr531_name, "Feroceon 88FR531-vd" + string cpu_88fr571_name, "Feroceon 88FR571-vd" + string cpu_88fr131_name, "Feroceon 88FR131" .align .section ".proc.info.init", #alloc, #execinstr -#ifdef CONFIG_CPU_FEROCEON_OLD_ID - .type __feroceon_old_id_proc_info,#object -__feroceon_old_id_proc_info: - .long 0x41009260 - .long 0xff00fff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __feroceon_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_feroceon_name - .long feroceon_processor_functions - .long v4wbi_tlb_fns - .long feroceon_user_fns - .long feroceon_cache_fns - .size __feroceon_old_id_proc_info, . - __feroceon_old_id_proc_info -#endif - - .type __88fr531_proc_info,#object -__88fr531_proc_info: - .long 0x56055310 - .long 0xfffffff0 +.macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask .long PMD_TYPE_SECT | \ PMD_SECT_BUFFERABLE | \ PMD_SECT_CACHEABLE | \ @@ -637,59 +605,22 @@ __88fr531_proc_info: .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_88fr531_name + .long \cpu_name .long feroceon_processor_functions .long v4wbi_tlb_fns .long feroceon_user_fns - .long feroceon_cache_fns - .size __88fr531_proc_info, . - __88fr531_proc_info + .long \cache + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm - .type __88fr571_proc_info,#object -__88fr571_proc_info: - .long 0x56155710 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __feroceon_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_88fr571_name - .long feroceon_processor_functions - .long v4wbi_tlb_fns - .long feroceon_user_fns - .long feroceon_range_cache_fns - .size __88fr571_proc_info, . - __88fr571_proc_info +#ifdef CONFIG_CPU_FEROCEON_OLD_ID + feroceon_proc_info feroceon_old_id, 0x41009260, 0xff00fff0, \ + cpu_name=cpu_feroceon_name, cache=feroceon_cache_fns +#endif - .type __88fr131_proc_info,#object -__88fr131_proc_info: - .long 0x56251310 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __feroceon_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_88fr131_name - .long feroceon_processor_functions - .long v4wbi_tlb_fns - .long feroceon_user_fns - .long feroceon_range_cache_fns - .size __88fr131_proc_info, . - __88fr131_proc_info + feroceon_proc_info 88fr531, 0x56055310, 0xfffffff0, cpu_88fr531_name, \ + cache=feroceon_cache_fns + feroceon_proc_info 88fr571, 0x56155710, 0xfffffff0, cpu_88fr571_name, \ + cache=feroceon_range_cache_fns + feroceon_proc_info 88fr131, 0x56251310, 0xfffffff0, cpu_88fr131_name, \ + cache=feroceon_range_cache_fns diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 7d63beaf974..ee1d8059395 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -38,9 +38,14 @@ /* * mmid - get context id from mm pointer (mm->context.id) + * note, this field is 64bit, so in big-endian the two words are swapped too. */ .macro mmid, rd, rn +#ifdef __ARMEB__ + ldr \rd, [\rn, #MM_CONTEXT_ID + 4 ] +#else ldr \rd, [\rn, #MM_CONTEXT_ID] +#endif .endm /* @@ -61,28 +66,39 @@ .endm /* - * cache_line_size - get the cache line size from the CSIDR register - * (available on ARMv7+). It assumes that the CSSR register was configured - * to access the L1 data cache CSIDR. + * dcache_line_size - get the minimum D-cache line size from the CTR register + * on ARMv7. */ .macro dcache_line_size, reg, tmp - mrc p15, 1, \tmp, c0, c0, 0 @ read CSIDR - and \tmp, \tmp, #7 @ cache line size encoding - mov \reg, #16 @ size offset + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + lsr \tmp, \tmp, #16 + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word mov \reg, \reg, lsl \tmp @ actual cache line size .endm +/* + * icache_line_size - get the minimum I-cache line size from the CTR register + * on ARMv7. + */ + .macro icache_line_size, reg, tmp + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word + mov \reg, \reg, lsl \tmp @ actual cache line size + .endm /* * Sanity check the PTE configuration for the code below - which makes - * certain assumptions about how these bits are layed out. + * certain assumptions about how these bits are laid out. */ #ifdef CONFIG_MMU #if L_PTE_SHARED != PTE_EXT_SHARED #error PTE shared bit mismatch #endif -#if (L_PTE_EXEC+L_PTE_USER+L_PTE_WRITE+L_PTE_DIRTY+L_PTE_YOUNG+\ - L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED +#if !defined (CONFIG_ARM_LPAE) && \ + (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ + L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED #error Invalid Linux PTE bit settings #endif #endif /* CONFIG_MMU */ @@ -96,8 +112,8 @@ * 100x 1 0 1 r/o no acc * 10x0 1 0 1 r/o no acc * 1011 0 0 1 r/w no acc - * 110x 0 1 0 r/w r/o - * 11x0 0 1 0 r/w r/o + * 110x 1 1 1 r/o r/o + * 11x0 1 1 1 r/o r/o * 1111 0 1 1 r/w r/w */ .macro armv6_mt_table pfx @@ -117,11 +133,11 @@ .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED .long 0x00 @ unused .long 0x00 @ unused - .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX @ L_PTE_MT_VECTORS .endm .macro armv6_set_pte_ext pfx - str r1, [r0], #-2048 @ linux version + str r1, [r0], #2048 @ linux version bic r3, r1, #0x000003fc bic r3, r3, #PTE_TYPE_MASK @@ -132,23 +148,27 @@ and r2, r1, #L_PTE_MT_MASK ldr r2, [ip, r2] - tst r1, #L_PTE_WRITE - tstne r1, #L_PTE_DIRTY - orreq r3, r3, #PTE_EXT_APX + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_DIRTY|L_PTE_RDONLY + orrne r3, r3, #PTE_EXT_APX tst r1, #L_PTE_USER orrne r3, r3, #PTE_EXT_AP1 tstne r3, #PTE_EXT_APX - bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 - tst r1, #L_PTE_EXEC - orreq r3, r3, #PTE_EXT_XN + @ user read-only -> kernel read-only + bicne r3, r3, #PTE_EXT_AP0 - orr r3, r3, r2 + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN + + eor r3, r3, r2 tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_PRESENT moveq r3, #0 + tstne r1, #L_PTE_NONE + movne r3, #0 str r3, [r0] mcr p15, 0, r0, c7, c10, 1 @ flush_pte @@ -170,9 +190,9 @@ * 1111 0xff r/w r/w */ .macro armv3_set_pte_ext wc_disable=1 - str r1, [r0], #-2048 @ linux version + str r1, [r0], #2048 @ linux version - eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits bic r2, r2, #PTE_TYPE_MASK @@ -181,7 +201,7 @@ tst r3, #L_PTE_USER @ user? orrne r2, r2, #PTE_SMALL_AP_URO_SRW - tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ write and dirty? + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? orreq r2, r2, #PTE_SMALL_AP_UNO_SRW tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? @@ -193,7 +213,7 @@ bicne r2, r2, #PTE_BUFFERABLE #endif .endif - str r2, [r0] @ hardware version + str r2, [r0] @ hardware version .endm @@ -213,9 +233,9 @@ * 1111 11 r/w r/w */ .macro xscale_set_pte_ext_prologue - str r1, [r0], #-2048 @ linux version + str r1, [r0] @ linux version - eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits orr r2, r2, #PTE_TYPE_EXT @ extended page @@ -223,7 +243,7 @@ tst r3, #L_PTE_USER @ user? orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w - tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ write and dirty? + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w @ combined with user -> user r/w .endm @@ -232,8 +252,82 @@ tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? movne r2, #0 @ no -> fault - str r2, [r0] @ hardware version + str r2, [r0, #2048]! @ hardware version mov ip, #0 mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line mcr p15, 0, ip, c7, c10, 4 @ data write barrier .endm + +.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0 + .type \name\()_processor_functions, #object + .align 2 +ENTRY(\name\()_processor_functions) + .word \dabort + .word \pabort + .word cpu_\name\()_proc_init + .word cpu_\name\()_proc_fin + .word cpu_\name\()_reset + .word cpu_\name\()_do_idle + .word cpu_\name\()_dcache_clean_area + .word cpu_\name\()_switch_mm + + .if \nommu + .word 0 + .else + .word cpu_\name\()_set_pte_ext + .endif + + .if \suspend + .word cpu_\name\()_suspend_size +#ifdef CONFIG_PM_SLEEP + .word cpu_\name\()_do_suspend + .word cpu_\name\()_do_resume +#else + .word 0 + .word 0 +#endif + .else + .word 0 + .word 0 + .word 0 + .endif + + .size \name\()_processor_functions, . - \name\()_processor_functions +.endm + +.macro define_cache_functions name:req + .align 2 + .type \name\()_cache_fns, #object +ENTRY(\name\()_cache_fns) + .long \name\()_flush_icache_all + .long \name\()_flush_kern_cache_all + .long \name\()_flush_kern_cache_louis + .long \name\()_flush_user_cache_all + .long \name\()_flush_user_cache_range + .long \name\()_coherent_kern_range + .long \name\()_coherent_user_range + .long \name\()_flush_kern_dcache_area + .long \name\()_dma_map_area + .long \name\()_dma_unmap_area + .long \name\()_dma_flush_range + .size \name\()_cache_fns, . - \name\()_cache_fns +.endm + +.macro define_tlb_functions name:req, flags_up:req, flags_smp + .type \name\()_tlb_fns, #object +ENTRY(\name\()_tlb_fns) + .long \name\()_flush_user_tlb_range + .long \name\()_flush_kern_tlb_range + .ifnb \flags_smp + ALT_SMP(.long \flags_smp ) + ALT_UP(.long \flags_up ) + .else + .long \flags_up + .endif + .size \name\()_tlb_fns, . - \name\()_tlb_fns +.endm + +.macro globl_equ x, y + .globl \x + .equ \x, \y +.endm diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 4458ee6aa71..40acba59573 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -69,6 +69,7 @@ ENTRY(cpu_mohawk_proc_fin) * (same as arm926) */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_mohawk_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -79,6 +80,8 @@ ENTRY(cpu_mohawk_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_mohawk_reset) + .popsection /* * cpu_mohawk_do_idle() @@ -93,6 +96,17 @@ ENTRY(cpu_mohawk_do_idle) mov pc, lr /* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(mohawk_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(mohawk_flush_icache_all) + +/* * flush_user_cache_all() * * Clean and invalidate all cache entries in a particular @@ -179,6 +193,7 @@ ENTRY(mohawk_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* @@ -288,16 +303,11 @@ ENTRY(mohawk_dma_unmap_area) mov pc, lr ENDPROC(mohawk_dma_unmap_area) -ENTRY(mohawk_cache_fns) - .long mohawk_flush_kern_cache_all - .long mohawk_flush_user_cache_all - .long mohawk_flush_user_cache_range - .long mohawk_coherent_kern_range - .long mohawk_coherent_user_range - .long mohawk_flush_kern_dcache_area - .long mohawk_dma_map_area - .long mohawk_dma_unmap_area - .long mohawk_dma_flush_range + .globl mohawk_flush_kern_cache_louis + .equ mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions mohawk ENTRY(cpu_mohawk_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -338,7 +348,40 @@ ENTRY(cpu_mohawk_set_pte_ext) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr - __CPUINIT +.globl cpu_mohawk_suspend_size +.equ cpu_mohawk_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_mohawk_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode + mrc p15, 0, r5, c15, c1, 0 @ CP access reg + mrc p15, 0, r6, c13, c0, 0 @ PID + mrc p15, 0, r7, c3, c0, 0 @ domain ID + mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mrc p15, 0, r9, c1, c0, 0 @ control reg + bic r4, r4, #2 @ clear frequency change bit + stmia r0, {r4 - r9} @ store cp regs + ldmia sp!, {r4 - r9, pc} +ENDPROC(cpu_mohawk_do_suspend) + +ENTRY(cpu_mohawk_do_resume) + ldmia r0, {r4 - r9} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB + mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer + mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. + mcr p15, 0, r5, c15, c1, 0 @ CP access reg + mcr p15, 0, r6, c13, c0, 0 @ PID + mcr p15, 0, r7, c3, c0, 0 @ domain ID + orr r1, r1, #0x18 @ cache the page table in L2 + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_mohawk_do_resume) +#endif .type __mohawk_setup, #function __mohawk_setup: @@ -373,39 +416,14 @@ mohawk_crval: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type mohawk_processor_functions, #object -mohawk_processor_functions: - .word v5t_early_abort - .word legacy_pabort - .word cpu_mohawk_proc_init - .word cpu_mohawk_proc_fin - .word cpu_mohawk_reset - .word cpu_mohawk_do_idle - .word cpu_mohawk_dcache_clean_area - .word cpu_mohawk_switch_mm - .word cpu_mohawk_set_pte_ext - .size mohawk_processor_functions, . - mohawk_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions mohawk, dabort=v5t_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5te" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_mohawk_name, #object -cpu_mohawk_name: - .asciz "Marvell 88SV331x" - .size cpu_mohawk_name, . - cpu_mohawk_name + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_mohawk_name, "Marvell 88SV331x" .align diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index 5aa8d59c2e8..c45319c8f1d 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -62,6 +62,7 @@ ENTRY(cpu_sa110_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_sa110_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -74,6 +75,8 @@ ENTRY(cpu_sa110_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_sa110_reset) + .popsection /* * cpu_sa110_do_idle(type) @@ -156,8 +159,6 @@ ENTRY(cpu_sa110_set_pte_ext) #endif mov pc, lr - __CPUINIT - .type __sa110_setup, #function __sa110_setup: mov r10, #0 @@ -187,40 +188,14 @@ sa110_crval: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - - .type sa110_processor_functions, #object -ENTRY(sa110_processor_functions) - .word v4_early_abort - .word legacy_pabort - .word cpu_sa110_proc_init - .word cpu_sa110_proc_fin - .word cpu_sa110_reset - .word cpu_sa110_do_idle - .word cpu_sa110_dcache_clean_area - .word cpu_sa110_switch_mm - .word cpu_sa110_set_pte_ext - .size sa110_processor_functions, . - sa110_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions sa110, dabort=v4_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_sa110_name, #object -cpu_sa110_name: - .asciz "StrongARM-110" - .size cpu_sa110_name, . - cpu_sa110_name + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_sa110_name, "StrongARM-110" .align diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index 2ac4e6f1071..09d241ae2db 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -34,7 +34,7 @@ */ #define DCACHELINESIZE 32 - __INIT + .section .text /* * cpu_sa1100_proc_init() @@ -45,8 +45,6 @@ ENTRY(cpu_sa1100_proc_init) mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland mov pc, lr - .section .text - /* * cpu_sa1100_proc_fin() * @@ -72,6 +70,7 @@ ENTRY(cpu_sa1100_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_sa1100_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches @@ -84,6 +83,8 @@ ENTRY(cpu_sa1100_reset) bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_sa1100_reset) + .popsection /* * cpu_sa1100_do_idle(type) @@ -169,7 +170,33 @@ ENTRY(cpu_sa1100_set_pte_ext) #endif mov pc, lr - __CPUINIT +.globl cpu_sa1100_suspend_size +.equ cpu_sa1100_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_sa1100_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c3, c0, 0 @ domain ID + mrc p15, 0, r5, c13, c0, 0 @ PID + mrc p15, 0, r6, c1, c0, 0 @ control reg + stmia r0, {r4 - r6} @ store cp regs + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_sa1100_do_suspend) + +ENTRY(cpu_sa1100_do_resume) + ldmia r0, {r4 - r6} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ flush I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ flush I&D cache + mcr p15, 0, ip, c9, c0, 0 @ invalidate RB + mcr p15, 0, ip, c9, c0, 5 @ allow user space to use RB + + mcr p15, 0, r4, c3, c0, 0 @ domain ID + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r5, c13, c0, 0 @ PID + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_sa1100_do_resume) +#endif .type __sa1100_setup, #function __sa1100_setup: @@ -200,56 +227,28 @@ sa1100_crval: __INITDATA /* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - -/* * SA1100 and SA1110 share the same function calls */ - .type sa1100_processor_functions, #object -ENTRY(sa1100_processor_functions) - .word v4_early_abort - .word legacy_pabort - .word cpu_sa1100_proc_init - .word cpu_sa1100_proc_fin - .word cpu_sa1100_reset - .word cpu_sa1100_do_idle - .word cpu_sa1100_dcache_clean_area - .word cpu_sa1100_switch_mm - .word cpu_sa1100_set_pte_ext - .size sa1100_processor_functions, . - sa1100_processor_functions - - .section ".rodata" - - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4" - .size cpu_arch_name, . - cpu_arch_name - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions sa1100, dabort=v4_early_abort, pabort=legacy_pabort, suspend=1 - .type cpu_sa1100_name, #object -cpu_sa1100_name: - .asciz "StrongARM-1100" - .size cpu_sa1100_name, . - cpu_sa1100_name + .section ".rodata" - .type cpu_sa1110_name, #object -cpu_sa1110_name: - .asciz "StrongARM-1110" - .size cpu_sa1110_name, . - cpu_sa1110_name + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_sa1100_name, "StrongARM-1100" + string cpu_sa1110_name, "StrongARM-1110" .align .section ".proc.info.init", #alloc, #execinstr - .type __sa1100_proc_info,#object -__sa1100_proc_info: - .long 0x4401a110 - .long 0xfffffff0 +.macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask .long PMD_TYPE_SECT | \ PMD_SECT_BUFFERABLE | \ PMD_SECT_CACHEABLE | \ @@ -262,32 +261,13 @@ __sa1100_proc_info: .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT - .long cpu_sa1100_name + .long \cpu_name .long sa1100_processor_functions .long v4wb_tlb_fns .long v4_mc_user_fns .long v4wb_cache_fns - .size __sa1100_proc_info, . - __sa1100_proc_info + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm - .type __sa1110_proc_info,#object -__sa1110_proc_info: - .long 0x6901b110 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __sa1100_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT - .long cpu_sa1110_name - .long sa1100_processor_functions - .long v4wb_tlb_fns - .long v4_mc_user_fns - .long v4wb_cache_fns - .size __sa1110_proc_info, . - __sa1110_proc_info + sa1100_proc_info sa1100, 0x4401a110, 0xfffffff0, cpu_sa1100_name + sa1100_proc_info sa1110, 0x6901b110, 0xfffffff0, cpu_sa1110_name diff --git a/arch/arm/mm/proc-syms.c b/arch/arm/mm/proc-syms.c index 3e6210b4d6d..054b491ff76 100644 --- a/arch/arm/mm/proc-syms.c +++ b/arch/arm/mm/proc-syms.c @@ -17,7 +17,9 @@ #ifndef MULTI_CPU EXPORT_SYMBOL(cpu_dcache_clean_area); +#ifdef CONFIG_MMU EXPORT_SYMBOL(cpu_set_pte_ext); +#endif #else EXPORT_SYMBOL(processor); #endif diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 59a7e1ffe7b..32b3558321c 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -55,8 +55,16 @@ ENTRY(cpu_v6_proc_fin) * - loc - location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_v6_reset) + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x1 @ ...............m + mcr p15, 0, r1, c1, c0, 0 @ disable MMU + mov r1, #0 + mcr p15, 0, r1, c7, c5, 4 @ ISB mov pc, r0 +ENDPROC(cpu_v6_reset) + .popsection /* * cpu_v6_do_idle() @@ -72,16 +80,14 @@ ENTRY(cpu_v6_do_idle) mov pc, lr ENTRY(cpu_v6_dcache_clean_area) -#ifndef TLB_CAN_READ_FROM_L1_CACHE 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #D_CACHE_LINE_SIZE subs r1, r1, #D_CACHE_LINE_SIZE bhi 1b -#endif mov pc, lr /* - * cpu_arm926_switch_mm(pgd_phys, tsk) + * cpu_v6_switch_mm(pgd_phys, tsk) * * Set the translation table base pointer to be pgd_phys * @@ -93,12 +99,18 @@ ENTRY(cpu_v6_dcache_clean_area) ENTRY(cpu_v6_switch_mm) #ifdef CONFIG_MMU mov r2, #0 - ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id + mmid r1, r1 @ get mm->context.id ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) ALT_UP(orr r0, r0, #TTB_FLAGS_UP) mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB mcr p15, 0, r2, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrc p15, 0, r2, c13, c0, 1 @ read current context ID + bic r2, r2, #0xff @ extract the PID + and r1, r1, #0xff + orr r1, r1, r2 @ insert into new context ID +#endif mcr p15, 0, r1, c13, c0, 1 @ set context ID #endif mov pc, lr @@ -121,22 +133,53 @@ ENTRY(cpu_v6_set_pte_ext) #endif mov pc, lr +/* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ +.globl cpu_v6_suspend_size +.equ cpu_v6_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_v6_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c2, c0, 1 @ Translation table base 1 +#endif + mrc p15, 0, r7, c1, c0, 1 @ auxiliary control register + mrc p15, 0, r8, c1, c0, 2 @ co-processor access control + mrc p15, 0, r9, c1, c0, 0 @ control register + stmia r0, {r4 - r9} + ldmfd sp!, {r4- r9, pc} +ENDPROC(cpu_v6_do_suspend) +ENTRY(cpu_v6_do_resume) + mov ip, #0 + mcr p15, 0, ip, c7, c14, 0 @ clean+invalidate D cache + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c15, 0 @ clean+invalidate cache + mcr p15, 0, ip, c7, c10, 4 @ drain write buffer + mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID + ldmia r0, {r4 - r9} + mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) + ALT_UP(orr r1, r1, #TTB_FLAGS_UP) + mcr p15, 0, r1, c2, c0, 0 @ Translation table base 0 + mcr p15, 0, r6, c2, c0, 1 @ Translation table base 1 + mcr p15, 0, ip, c2, c0, 2 @ TTB control register +#endif + mcr p15, 0, r7, c1, c0, 1 @ auxiliary control register + mcr p15, 0, r8, c1, c0, 2 @ co-processor access control + mcr p15, 0, ip, c7, c5, 4 @ ISB + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_v6_do_resume) +#endif - .type cpu_v6_name, #object -cpu_v6_name: - .asciz "ARMv6-compatible processor" - .size cpu_v6_name, . - cpu_v6_name - - .type cpu_pj4_name, #object -cpu_pj4_name: - .asciz "Marvell PJ4 processor" - .size cpu_pj4_name, . - cpu_pj4_name + string cpu_v6_name, "ARMv6-compatible processor" .align - __CPUINIT - /* * __v6_setup * @@ -165,22 +208,39 @@ __v6_setup: mcr p15, 0, r0, c7, c14, 0 @ clean+invalidate D cache mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c15, 0 @ clean+invalidate cache - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer #ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs mcr p15, 0, r0, c2, c0, 2 @ TTB control register ALT_SMP(orr r4, r4, #TTB_FLAGS_SMP) ALT_UP(orr r4, r4, #TTB_FLAGS_UP) - mcr p15, 0, r4, c2, c0, 1 @ load TTB1 + ALT_SMP(orr r8, r8, #TTB_FLAGS_SMP) + ALT_UP(orr r8, r8, #TTB_FLAGS_UP) + mcr p15, 0, r8, c2, c0, 1 @ load TTB1 #endif /* CONFIG_MMU */ + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer and + @ complete invalidations adr r5, v6_crval ldmia r5, {r5, r6} -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r6, r6, #1 << 25 @ big-endian page tables -#endif + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables mrc p15, 0, r0, c1, c0, 0 @ read control register bic r0, r0, r5 @ clear bits them orr r0, r0, r6 @ set them +#ifdef CONFIG_ARM_ERRATA_364296 + /* + * Workaround for the 364296 ARM1136 r0p2 erratum (possible cache data + * corruption with hit-under-miss enabled). The conditional code below + * (setting the undocumented bit 31 in the auxiliary control register + * and the FI bit in the control register) disables hit-under-miss + * without putting the processor into full low interrupt latency mode. + */ + ldr r6, =0x4107b362 @ id for ARM1136 r0p2 + mrc p15, 0, r5, c0, c0, 0 @ get processor id + teq r5, r6 @ check for the faulty core + mrceq p15, 0, r5, c1, c0, 1 @ load aux control reg + orreq r5, r5, #(1 << 31) @ set the undocumented bit 31 + mcreq p15, 0, r5, c1, c0, 1 @ write aux control reg + orreq r0, r0, #(1 << 21) @ low interrupt latency configuration +#endif mov pc, lr @ return to head.S:__ret /* @@ -195,30 +255,13 @@ v6_crval: __INITDATA - .type v6_processor_functions, #object -ENTRY(v6_processor_functions) - .word v6_early_abort - .word v6_pabort - .word cpu_v6_proc_init - .word cpu_v6_proc_fin - .word cpu_v6_reset - .word cpu_v6_do_idle - .word cpu_v6_dcache_clean_area - .word cpu_v6_switch_mm - .word cpu_v6_set_pte_ext - .size v6_processor_functions, . - v6_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions v6, dabort=v6_early_abort, pabort=v6_pabort, suspend=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv6" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v6" - .size cpu_elf_name, . - cpu_elf_name + string cpu_arch_name, "armv6" + string cpu_elf_name, "v6" .align .section ".proc.info.init", #alloc, #execinstr @@ -255,32 +298,3 @@ __v6_proc_info: .long v6_user_fns .long v6_cache_fns .size __v6_proc_info, . - __v6_proc_info - - .type __pj4_v6_proc_info, #object -__pj4_v6_proc_info: - .long 0x560f5810 - .long 0xff0ffff0 - ALT_SMP(.long \ - PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ | \ - PMD_FLAGS_SMP) - ALT_UP(.long \ - PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ | \ - PMD_FLAGS_UP) - .long PMD_TYPE_SECT | \ - PMD_SECT_XN | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __v6_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS - .long cpu_pj4_name - .long v6_processor_functions - .long v6wbi_tlb_fns - .long v6_user_fns - .long v6_cache_fns - .size __pj4_v6_proc_info, . - __pj4_v6_proc_info diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S new file mode 100644 index 00000000000..1f52915f2b2 --- /dev/null +++ b/arch/arm/mm/proc-v7-2level.S @@ -0,0 +1,165 @@ +/* + * arch/arm/mm/proc-v7-2level.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define TTB_S (1 << 1) +#define TTB_RGN_NC (0 << 3) +#define TTB_RGN_OC_WBWA (1 << 3) +#define TTB_RGN_OC_WT (2 << 3) +#define TTB_RGN_OC_WB (3 << 3) +#define TTB_NOS (1 << 5) +#define TTB_IRGN_NC ((0 << 0) | (0 << 6)) +#define TTB_IRGN_WBWA ((0 << 0) | (1 << 6)) +#define TTB_IRGN_WT ((1 << 0) | (0 << 6)) +#define TTB_IRGN_WB ((1 << 0) | (1 << 6)) + +/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ +#define TTB_FLAGS_UP TTB_IRGN_WB|TTB_RGN_OC_WB +#define PMD_FLAGS_UP PMD_SECT_WB + +/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ +#define TTB_FLAGS_SMP TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA +#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S + +/* + * cpu_v7_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys + * + * - pgd_phys - physical address of new TTB + * + * It is assumed that: + * - we are not using split page tables + */ +ENTRY(cpu_v7_switch_mm) +#ifdef CONFIG_MMU + mov r2, #0 + mmid r1, r1 @ get mm->context.id + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) +#ifdef CONFIG_ARM_ERRATA_430973 + mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB +#endif +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrc p15, 0, r2, c13, c0, 1 @ read current context ID + lsr r2, r2, #8 @ extract the PID + bfi r1, r2, #8, #24 @ insert into new context ID +#endif +#ifdef CONFIG_ARM_ERRATA_754322 + dsb +#endif + mcr p15, 0, r1, c13, c0, 1 @ set context ID + isb + mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 + isb +#endif + mov pc, lr +ENDPROC(cpu_v7_switch_mm) + +/* + * cpu_v7_set_pte_ext(ptep, pte) + * + * Set a level 2 translation table entry. + * + * - ptep - pointer to level 2 translation table entry + * (hardware version is stored at +2048 bytes) + * - pte - PTE value to store + * - ext - value for extended PTE bits + */ +ENTRY(cpu_v7_set_pte_ext) +#ifdef CONFIG_MMU + str r1, [r0] @ linux version + + bic r3, r1, #0x000003f0 + bic r3, r3, #PTE_TYPE_MASK + orr r3, r3, r2 + orr r3, r3, #PTE_EXT_AP0 | 2 + + tst r1, #1 << 4 + orrne r3, r3, #PTE_EXT_TEX(1) + + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_RDONLY | L_PTE_DIRTY + orrne r3, r3, #PTE_EXT_APX + + tst r1, #L_PTE_USER + orrne r3, r3, #PTE_EXT_AP1 + + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN + + tst r1, #L_PTE_YOUNG + tstne r1, #L_PTE_VALID + eorne r1, r1, #L_PTE_NONE + tstne r1, #L_PTE_NONE + moveq r3, #0 + + ARM( str r3, [r0, #2048]! ) + THUMB( add r0, r0, #2048 ) + THUMB( str r3, [r0] ) + ALT_SMP(W(nop)) + ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte +#endif + mov pc, lr +ENDPROC(cpu_v7_set_pte_ext) + + /* + * Memory region attributes with SCTLR.TRE=1 + * + * n = TEX[0],C,B + * TR = PRRR[2n+1:2n] - memory type + * IR = NMRR[2n+1:2n] - inner cacheable property + * OR = NMRR[2n+17:2n+16] - outer cacheable property + * + * n TR IR OR + * UNCACHED 000 00 + * BUFFERABLE 001 10 00 00 + * WRITETHROUGH 010 10 10 10 + * WRITEBACK 011 10 11 11 + * reserved 110 + * WRITEALLOC 111 10 01 01 + * DEV_SHARED 100 01 + * DEV_NONSHARED 100 01 + * DEV_WC 001 10 + * DEV_CACHED 011 10 + * + * Other attributes: + * + * DS0 = PRRR[16] = 0 - device shareable property + * DS1 = PRRR[17] = 1 - device shareable property + * NS0 = PRRR[18] = 0 - normal shareable property + * NS1 = PRRR[19] = 1 - normal shareable property + * NOS = PRRR[24+n] = 1 - not outer shareable + */ +.equ PRRR, 0xff0a81a8 +.equ NMRR, 0x40e040e0 + + /* + * Macro for setting up the TTBRx and TTBCR registers. + * - \ttb0 and \ttb1 updated with the corresponding flags. + */ + .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + mcr p15, 0, \zero, c2, c0, 2 @ TTB control register + ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP) + ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) + mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 + .endm + + /* AT + * TFR EV X F I D LR S + * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM + * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 01 0 110 0011 1100 .111 1101 < we want + */ + .align 2 + .type v7_crval, #object +v7_crval: + crval clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S new file mode 100644 index 00000000000..22e3ad63500 --- /dev/null +++ b/arch/arm/mm/proc-v7-3level.S @@ -0,0 +1,161 @@ +/* + * arch/arm/mm/proc-v7-3level.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2011 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * based on arch/arm/mm/proc-v7-2level.S + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define TTB_IRGN_NC (0 << 8) +#define TTB_IRGN_WBWA (1 << 8) +#define TTB_IRGN_WT (2 << 8) +#define TTB_IRGN_WB (3 << 8) +#define TTB_RGN_NC (0 << 10) +#define TTB_RGN_OC_WBWA (1 << 10) +#define TTB_RGN_OC_WT (2 << 10) +#define TTB_RGN_OC_WB (3 << 10) +#define TTB_S (3 << 12) +#define TTB_EAE (1 << 31) + +/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ +#define TTB_FLAGS_UP (TTB_IRGN_WB|TTB_RGN_OC_WB) +#define PMD_FLAGS_UP (PMD_SECT_WB) + +/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ +#define TTB_FLAGS_SMP (TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA) +#define PMD_FLAGS_SMP (PMD_SECT_WBWA|PMD_SECT_S) + +#ifndef __ARMEB__ +# define rpgdl r0 +# define rpgdh r1 +#else +# define rpgdl r1 +# define rpgdh r0 +#endif + +/* + * cpu_v7_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys (physical address of + * the new TTB). + */ +ENTRY(cpu_v7_switch_mm) +#ifdef CONFIG_MMU + mmid r2, r2 + asid r2, r2 + orr rpgdh, rpgdh, r2, lsl #(48 - 32) @ upper 32-bits of pgd + mcrr p15, 0, rpgdl, rpgdh, c2 @ set TTB 0 + isb +#endif + mov pc, lr +ENDPROC(cpu_v7_switch_mm) + +#ifdef __ARMEB__ +#define rl r3 +#define rh r2 +#else +#define rl r2 +#define rh r3 +#endif + +/* + * cpu_v7_set_pte_ext(ptep, pte) + * + * Set a level 2 translation table entry. + * - ptep - pointer to level 3 translation table entry + * - pte - PTE value to store (64-bit in r2 and r3) + */ +ENTRY(cpu_v7_set_pte_ext) +#ifdef CONFIG_MMU + tst rl, #L_PTE_VALID + beq 1f + tst rh, #1 << (57 - 32) @ L_PTE_NONE + bicne rl, #L_PTE_VALID + bne 1f + tst rh, #1 << (55 - 32) @ L_PTE_DIRTY + orreq rl, #L_PTE_RDONLY +1: strd r2, r3, [r0] + ALT_SMP(W(nop)) + ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte +#endif + mov pc, lr +ENDPROC(cpu_v7_set_pte_ext) + + /* + * Memory region attributes for LPAE (defined in pgtable-3level.h): + * + * n = AttrIndx[2:0] + * + * n MAIR + * UNCACHED 000 00000000 + * BUFFERABLE 001 01000100 + * DEV_WC 001 01000100 + * WRITETHROUGH 010 10101010 + * WRITEBACK 011 11101110 + * DEV_CACHED 011 11101110 + * DEV_SHARED 100 00000100 + * DEV_NONSHARED 100 00000100 + * unused 101 + * unused 110 + * WRITEALLOC 111 11111111 + */ +.equ PRRR, 0xeeaa4400 @ MAIR0 +.equ NMRR, 0xff000004 @ MAIR1 + + /* + * Macro for setting up the TTBRx and TTBCR registers. + * - \ttbr1 updated. + */ + .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address + mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT + cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? + mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register + orr \tmp, \tmp, #TTB_EAE + ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) + ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP) + ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP << 16) + ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP << 16) + /* + * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above), + * otherwise booting secondary CPUs would end up using TTBR1 for the + * identity mapping set up in TTBR0. + */ + orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ + mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR + mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits + mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits + addls \ttbr1, \ttbr1, #TTBR1_OFFSET + mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits + mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits + mcrr p15, 0, \ttbr0, \zero, c2 @ load TTBR0 + mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + mcrr p15, 0, \ttbr0, \zero, c2 @ load TTBR0 + .endm + + /* + * AT + * TFR EV X F IHD LR S + * .EEE ..EE PUI. .TAT 4RVI ZWRS BLDP WCAM + * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 11 0 110 1 0011 1100 .111 1101 < we want + */ + .align 2 + .type v7_crval, #object +v7_crval: + crval clear=0x0120c302, mmuset=0x30c23c7d, ucset=0x00c01c7c diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 53cbe222515..3db2c2f04a3 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -19,24 +19,11 @@ #include "proc-macros.S" -#define TTB_S (1 << 1) -#define TTB_RGN_NC (0 << 3) -#define TTB_RGN_OC_WBWA (1 << 3) -#define TTB_RGN_OC_WT (2 << 3) -#define TTB_RGN_OC_WB (3 << 3) -#define TTB_NOS (1 << 5) -#define TTB_IRGN_NC ((0 << 0) | (0 << 6)) -#define TTB_IRGN_WBWA ((0 << 0) | (1 << 6)) -#define TTB_IRGN_WT ((1 << 0) | (0 << 6)) -#define TTB_IRGN_WB ((1 << 0) | (1 << 6)) - -/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ -#define TTB_FLAGS_UP TTB_IRGN_WB|TTB_RGN_OC_WB -#define PMD_FLAGS_UP PMD_SECT_WB - -/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ -#define TTB_FLAGS_SMP TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA -#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S +#ifdef CONFIG_ARM_LPAE +#include "proc-v7-3level.S" +#else +#include "proc-v7-2level.S" +#endif ENTRY(cpu_v7_proc_init) mov pc, lr @@ -58,11 +45,21 @@ ENDPROC(cpu_v7_proc_fin) * to what would be the reset vector. * * - loc - location to jump to for soft reset + * + * This code must be executed using a flat identity mapping with + * caches disabled. */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_v7_reset) - mov pc, r0 + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x1 @ ...............m + THUMB( bic r1, r1, #1 << 30 ) @ SCTLR.TE (Thumb exceptions) + mcr p15, 0, r1, c1, c0, 0 @ disable MMU + isb + bx r0 ENDPROC(cpu_v7_reset) + .popsection /* * cpu_v7_do_idle() @@ -78,97 +75,127 @@ ENTRY(cpu_v7_do_idle) ENDPROC(cpu_v7_do_idle) ENTRY(cpu_v7_dcache_clean_area) -#ifndef TLB_CAN_READ_FROM_L1_CACHE - dcache_line_size r2, r3 -1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + ALT_SMP(W(nop)) @ MP extensions imply L1 PTW + ALT_UP_B(1f) + mov pc, lr +1: dcache_line_size r2, r3 +2: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, r2 subs r1, r1, r2 - bhi 1b - dsb -#endif + bhi 2b + dsb ishst mov pc, lr ENDPROC(cpu_v7_dcache_clean_area) -/* - * cpu_v7_switch_mm(pgd_phys, tsk) - * - * Set the translation table base pointer to be pgd_phys - * - * - pgd_phys - physical address of new TTB - * - * It is assumed that: - * - we are not using split page tables - */ -ENTRY(cpu_v7_switch_mm) + string cpu_v7_name, "ARMv7 Processor" + .align + +/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */ +.globl cpu_v7_suspend_size +.equ cpu_v7_suspend_size, 4 * 9 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_v7_do_suspend) + stmfd sp!, {r4 - r10, lr} + mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID + mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID + stmia r0!, {r4 - r5} #ifdef CONFIG_MMU - mov r2, #0 - ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id - ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) - ALT_UP(orr r0, r0, #TTB_FLAGS_UP) -#ifdef CONFIG_ARM_ERRATA_430973 - mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB + mrc p15, 0, r6, c3, c0, 0 @ Domain ID +#ifdef CONFIG_ARM_LPAE + mrrc p15, 1, r5, r7, c2 @ TTB 1 +#else + mrc p15, 0, r7, c2, c0, 1 @ TTB 1 #endif - mcr p15, 0, r2, c13, c0, 1 @ set reserved context ID - isb -1: mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 - isb - mcr p15, 0, r1, c13, c0, 1 @ set context ID - isb + mrc p15, 0, r11, c2, c0, 2 @ TTB control register #endif - mov pc, lr -ENDPROC(cpu_v7_switch_mm) - -/* - * cpu_v7_set_pte_ext(ptep, pte) - * - * Set a level 2 translation table entry. - * - * - ptep - pointer to level 2 translation table entry - * (hardware version is stored at -1024 bytes) - * - pte - PTE value to store - * - ext - value for extended PTE bits - */ -ENTRY(cpu_v7_set_pte_ext) + mrc p15, 0, r8, c1, c0, 0 @ Control register + mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register + mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control + stmia r0, {r5 - r11} + ldmfd sp!, {r4 - r10, pc} +ENDPROC(cpu_v7_do_suspend) + +ENTRY(cpu_v7_do_resume) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID + ldmia r0!, {r4 - r5} + mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID + mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID + ldmia r0, {r5 - r11} #ifdef CONFIG_MMU - ARM( str r1, [r0], #-2048 ) @ linux version - THUMB( str r1, [r0] ) @ linux version - THUMB( sub r0, r0, #2048 ) - - bic r3, r1, #0x000003f0 - bic r3, r3, #PTE_TYPE_MASK - orr r3, r3, r2 - orr r3, r3, #PTE_EXT_AP0 | 2 - - tst r1, #1 << 4 - orrne r3, r3, #PTE_EXT_TEX(1) - - tst r1, #L_PTE_WRITE - tstne r1, #L_PTE_DIRTY - orreq r3, r3, #PTE_EXT_APX - - tst r1, #L_PTE_USER - orrne r3, r3, #PTE_EXT_AP1 - tstne r3, #PTE_EXT_APX - bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 - - tst r1, #L_PTE_EXEC - orreq r3, r3, #PTE_EXT_XN - - tst r1, #L_PTE_YOUNG - tstne r1, #L_PTE_PRESENT - moveq r3, #0 - - str r3, [r0] - mcr p15, 0, r0, c7, c10, 1 @ flush_pte + mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs + mcr p15, 0, r6, c3, c0, 0 @ Domain ID +#ifdef CONFIG_ARM_LPAE + mcrr p15, 0, r1, ip, c2 @ TTB 0 + mcrr p15, 1, r5, r7, c2 @ TTB 1 +#else + ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) + ALT_UP(orr r1, r1, #TTB_FLAGS_UP) + mcr p15, 0, r1, c2, c0, 0 @ TTB 0 + mcr p15, 0, r7, c2, c0, 1 @ TTB 1 +#endif + mcr p15, 0, r11, c2, c0, 2 @ TTB control register + ldr r4, =PRRR @ PRRR + ldr r5, =NMRR @ NMRR + mcr p15, 0, r4, c10, c2, 0 @ write PRRR + mcr p15, 0, r5, c10, c2, 1 @ write NMRR +#endif /* CONFIG_MMU */ + mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register + teq r4, r9 @ Is it already set? + mcrne p15, 0, r9, c1, c0, 1 @ No, so write it + mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control + isb + dsb + mov r0, r8 @ control register + b cpu_resume_mmu +ENDPROC(cpu_v7_do_resume) #endif - mov pc, lr -ENDPROC(cpu_v7_set_pte_ext) -cpu_v7_name: - .ascii "ARMv7 Processor" - .align +#ifdef CONFIG_CPU_PJ4B + globl_equ cpu_pj4b_switch_mm, cpu_v7_switch_mm + globl_equ cpu_pj4b_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_pj4b_proc_init, cpu_v7_proc_init + globl_equ cpu_pj4b_proc_fin, cpu_v7_proc_fin + globl_equ cpu_pj4b_reset, cpu_v7_reset +#ifdef CONFIG_PJ4B_ERRATA_4742 +ENTRY(cpu_pj4b_do_idle) + dsb @ WFI may enter a low-power mode + wfi + dsb @barrier + mov pc, lr +ENDPROC(cpu_pj4b_do_idle) +#else + globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle +#endif + globl_equ cpu_pj4b_dcache_clean_area, cpu_v7_dcache_clean_area +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_pj4b_do_suspend) + stmfd sp!, {r6 - r10} + mrc p15, 1, r6, c15, c1, 0 @ save CP15 - extra features + mrc p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0 + mrc p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2 + mrc p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1 + mrc p15, 0, r10, c9, c14, 0 @ save CP15 - PMC + stmia r0!, {r6 - r10} + ldmfd sp!, {r6 - r10} + b cpu_v7_do_suspend +ENDPROC(cpu_pj4b_do_suspend) + +ENTRY(cpu_pj4b_do_resume) + ldmia r0!, {r6 - r10} + mcr p15, 1, r6, c15, c1, 0 @ save CP15 - extra features + mcr p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0 + mcr p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2 + mcr p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1 + mcr p15, 0, r10, c9, c14, 0 @ save CP15 - PMC + b cpu_v7_do_resume +ENDPROC(cpu_pj4b_do_resume) +#endif +.globl cpu_pj4b_suspend_size +.equ cpu_pj4b_suspend_size, 4 * 14 - __CPUINIT +#endif /* * __v7_setup @@ -176,27 +203,92 @@ cpu_v7_name: * Initialise TLB, Caches, and MMU state ready to switch the MMU * on. Return in r0 the new CP15 C1 control register setting. * - * We automatically detect if we have a Harvard cache, and use the - * Harvard cache control instructions insead of the unified cache - * control instructions. - * * This should be able to cover all ARMv7 cores. * * It is assumed that: * - cache type register is implemented */ +__v7_ca5mp_setup: __v7_ca9mp_setup: +__v7_cr7mp_setup: + mov r10, #(1 << 0) @ Cache/TLB ops broadcasting + b 1f +__v7_ca7mp_setup: +__v7_ca12mp_setup: +__v7_ca15mp_setup: +__v7_ca17mp_setup: + mov r10, #0 +1: #ifdef CONFIG_SMP ALT_SMP(mrc p15, 0, r0, c1, c0, 1) ALT_UP(mov r0, #(1 << 6)) @ fake it for UP tst r0, #(1 << 6) @ SMP/nAMP mode enabled? - orreq r0, r0, #(1 << 6) | (1 << 0) @ Enable SMP/nAMP mode and - mcreq p15, 0, r0, c1, c0, 1 @ TLB ops broadcasting + orreq r0, r0, #(1 << 6) @ Enable SMP/nAMP mode + orreq r0, r0, r10 @ Enable CPU-specific SMP bits + mcreq p15, 0, r0, c1, c0, 1 #endif + b __v7_setup + +__v7_pj4b_setup: +#ifdef CONFIG_CPU_PJ4B + +/* Auxiliary Debug Modes Control 1 Register */ +#define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */ +#define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */ +#define PJ4B_BCK_OFF_STREX (1 << 5) /* Enable the back off of STREX instr */ +#define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */ + +/* Auxiliary Debug Modes Control 2 Register */ +#define PJ4B_FAST_LDR (1 << 23) /* Disable fast LDR */ +#define PJ4B_SNOOP_DATA (1 << 25) /* Do not interleave write and snoop data */ +#define PJ4B_CWF (1 << 27) /* Disable Critical Word First feature */ +#define PJ4B_OUTSDNG_NC (1 << 29) /* Disable outstanding non cacheable rqst */ +#define PJ4B_L1_REP_RR (1 << 30) /* L1 replacement - Strict round robin */ +#define PJ4B_AUX_DBG_CTRL2 (PJ4B_SNOOP_DATA | PJ4B_CWF |\ + PJ4B_OUTSDNG_NC | PJ4B_L1_REP_RR) + +/* Auxiliary Functional Modes Control Register 0 */ +#define PJ4B_SMP_CFB (1 << 1) /* Set SMP mode. Join the coherency fabric */ +#define PJ4B_L1_PAR_CHK (1 << 2) /* Support L1 parity checking */ +#define PJ4B_BROADCAST_CACHE (1 << 8) /* Broadcast Cache and TLB maintenance */ + +/* Auxiliary Debug Modes Control 0 Register */ +#define PJ4B_WFI_WFE (1 << 22) /* WFI/WFE - serve the DVM and back to idle */ + + /* Auxiliary Debug Modes Control 1 Register */ + mrc p15, 1, r0, c15, c1, 1 + orr r0, r0, #PJ4B_CLEAN_LINE + orr r0, r0, #PJ4B_BCK_OFF_STREX + orr r0, r0, #PJ4B_INTER_PARITY + bic r0, r0, #PJ4B_STATIC_BP + mcr p15, 1, r0, c15, c1, 1 + + /* Auxiliary Debug Modes Control 2 Register */ + mrc p15, 1, r0, c15, c1, 2 + bic r0, r0, #PJ4B_FAST_LDR + orr r0, r0, #PJ4B_AUX_DBG_CTRL2 + mcr p15, 1, r0, c15, c1, 2 + + /* Auxiliary Functional Modes Control Register 0 */ + mrc p15, 1, r0, c15, c2, 0 +#ifdef CONFIG_SMP + orr r0, r0, #PJ4B_SMP_CFB +#endif + orr r0, r0, #PJ4B_L1_PAR_CHK + orr r0, r0, #PJ4B_BROADCAST_CACHE + mcr p15, 1, r0, c15, c2, 0 + + /* Auxiliary Debug Modes Control 0 Register */ + mrc p15, 1, r0, c15, c1, 0 + orr r0, r0, #PJ4B_WFI_WFE + mcr p15, 1, r0, c15, c1, 0 + +#endif /* CONFIG_CPU_PJ4B */ + __v7_setup: adr r12, __v7_setup_stack @ the local stack stmia r12, {r0-r5, r7, r9, r11, lr} - bl v7_flush_dcache_all + bl v7_flush_dcache_louis ldmia r12, {r0-r5, r7, r9, r11, lr} mrc p15, 0, r0, c0, c0, 0 @ read main ID register @@ -212,7 +304,8 @@ __v7_setup: ldr r10, =0x00000c08 @ Cortex-A8 primary part number teq r0, r10 bne 2f -#ifdef CONFIG_ARM_ERRATA_430973 +#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) + teq r5, #0x00100000 @ only present in r1p* mrceq p15, 0, r10, c1, c0, 1 @ read aux control register orreq r10, r10, #(1 << 6) @ set IBE to 1 @@ -254,64 +347,61 @@ __v7_setup: mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register #endif #ifdef CONFIG_ARM_ERRATA_743622 - teq r6, #0x20 @ present in r2p0 - teqne r6, #0x21 @ present in r2p1 - teqne r6, #0x22 @ present in r2p2 + teq r5, #0x00200000 @ only present in r2p* mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register orreq r10, r10, #1 << 6 @ set bit #6 mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register #endif +#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) + ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 + ALT_UP_B(1f) + mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrlt r10, r10, #1 << 11 @ set bit #11 + mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register +1: +#endif -3: mov r10, #0 -#ifdef HARVARD_CACHE - mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate + /* Cortex-A15 Errata */ +3: ldr r10, =0x00000c0f @ Cortex-A15 primary part number + teq r0, r10 + bne 4f + +#ifdef CONFIG_ARM_ERRATA_773022 + cmp r6, #0x4 @ only present up to r0p4 + mrcle p15, 0, r10, c1, c0, 1 @ read aux control register + orrle r10, r10, #1 << 1 @ disable loop buffer + mcrle p15, 0, r10, c1, c0, 1 @ write aux control register #endif - dsb + +4: mov r10, #0 + mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate #ifdef CONFIG_MMU mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs - mcr p15, 0, r10, c2, c0, 2 @ TTB control register - ALT_SMP(orr r4, r4, #TTB_FLAGS_SMP) - ALT_UP(orr r4, r4, #TTB_FLAGS_UP) - mcr p15, 0, r4, c2, c0, 1 @ load TTB1 - mov r10, #0x1f @ domains 0, 1 = manager - mcr p15, 0, r10, c3, c0, 0 @ load domain access register - /* - * Memory region attributes with SCTLR.TRE=1 - * - * n = TEX[0],C,B - * TR = PRRR[2n+1:2n] - memory type - * IR = NMRR[2n+1:2n] - inner cacheable property - * OR = NMRR[2n+17:2n+16] - outer cacheable property - * - * n TR IR OR - * UNCACHED 000 00 - * BUFFERABLE 001 10 00 00 - * WRITETHROUGH 010 10 10 10 - * WRITEBACK 011 10 11 11 - * reserved 110 - * WRITEALLOC 111 10 01 01 - * DEV_SHARED 100 01 - * DEV_NONSHARED 100 01 - * DEV_WC 001 10 - * DEV_CACHED 011 10 - * - * Other attributes: - * - * DS0 = PRRR[16] = 0 - device shareable property - * DS1 = PRRR[17] = 1 - device shareable property - * NS0 = PRRR[18] = 0 - normal shareable property - * NS1 = PRRR[19] = 1 - normal shareable property - * NOS = PRRR[24+n] = 1 - not outer shareable - */ - ldr r5, =0xff0a81a8 @ PRRR - ldr r6, =0x40e040e0 @ NMRR + v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup + ldr r5, =PRRR @ PRRR + ldr r6, =NMRR @ NMRR mcr p15, 0, r5, c10, c2, 0 @ write PRRR mcr p15, 0, r6, c10, c2, 1 @ write NMRR #endif + dsb @ Complete invalidations +#ifndef CONFIG_ARM_THUMBEE + mrc p15, 0, r0, c0, c1, 0 @ read ID_PFR0 for ThumbEE + and r0, r0, #(0xf << 12) @ ThumbEE enabled field + teq r0, #(1 << 12) @ check if ThumbEE is present + bne 1f + mov r5, #0 + mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0 + mrc p14, 6, r0, c0, c0, 0 @ load TEECR + orr r0, r0, #1 @ set the 1st bit in order to + mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access +1: +#endif adr r5, v7_crval ldmia r5, {r5, r6} -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r6, r6, #1 << 25 @ big-endian page tables + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables +#ifdef CONFIG_SWP_EMULATE + orr r5, r5, #(1 << 10) @ set SW bit in "clear" + bic r6, r6, #(1 << 10) @ clear it in "mmuset" #endif mrc p15, 0, r0, c1, c0, 0 @ read control register bic r0, r0, r5 @ clear bits them @@ -320,78 +410,148 @@ __v7_setup: mov pc, lr @ return to head.S:__ret ENDPROC(__v7_setup) - /* AT - * TFR EV X F I D LR S - * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM - * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced - * 1 0 110 0011 1100 .111 1101 < we want - */ - .type v7_crval, #object -v7_crval: - crval clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c - + .align 2 __v7_setup_stack: .space 4 * 11 @ 11 registers __INITDATA - .type v7_processor_functions, #object -ENTRY(v7_processor_functions) - .word v7_early_abort - .word v7_pabort - .word cpu_v7_proc_init - .word cpu_v7_proc_fin - .word cpu_v7_reset - .word cpu_v7_do_idle - .word cpu_v7_dcache_clean_area - .word cpu_v7_switch_mm - .word cpu_v7_set_pte_ext - .size v7_processor_functions, . - v7_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 +#ifdef CONFIG_CPU_PJ4B + define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 +#endif .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv7" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v7" - .size cpu_elf_name, . - cpu_elf_name + string cpu_arch_name, "armv7" + string cpu_elf_name, "v7" .align .section ".proc.info.init", #alloc, #execinstr - .type __v7_ca9mp_proc_info, #object -__v7_ca9mp_proc_info: - .long 0x410fc090 @ Required ID value - .long 0xff0ffff0 @ Mask for ID - ALT_SMP(.long \ - PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ | \ - PMD_FLAGS_SMP) - ALT_UP(.long \ - PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ | \ - PMD_FLAGS_UP) - .long PMD_TYPE_SECT | \ - PMD_SECT_XN | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __v7_ca9mp_setup + /* + * Standard v7 proc info content + */ +.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions + ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ + PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags) + ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ + PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags) + .long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags + W(b) \initfunc .long cpu_arch_name .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \ + HWCAP_EDSP | HWCAP_TLS | \hwcaps .long cpu_v7_name - .long v7_processor_functions + .long \proc_fns .long v7wbi_tlb_fns .long v6_user_fns .long v7_cache_fns +.endm + +#ifndef CONFIG_ARM_LPAE + /* + * ARM Ltd. Cortex A5 processor. + */ + .type __v7_ca5mp_proc_info, #object +__v7_ca5mp_proc_info: + .long 0x410fc050 + .long 0xff0ffff0 + __v7_proc __v7_ca5mp_setup + .size __v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info + + /* + * ARM Ltd. Cortex A9 processor. + */ + .type __v7_ca9mp_proc_info, #object +__v7_ca9mp_proc_info: + .long 0x410fc090 + .long 0xff0ffff0 + __v7_proc __v7_ca9mp_setup .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info +#endif /* CONFIG_ARM_LPAE */ + + /* + * Marvell PJ4B processor. + */ +#ifdef CONFIG_CPU_PJ4B + .type __v7_pj4b_proc_info, #object +__v7_pj4b_proc_info: + .long 0x560f5800 + .long 0xff0fff00 + __v7_proc __v7_pj4b_setup, proc_fns = pj4b_processor_functions + .size __v7_pj4b_proc_info, . - __v7_pj4b_proc_info +#endif + + /* + * ARM Ltd. Cortex R7 processor. + */ + .type __v7_cr7mp_proc_info, #object +__v7_cr7mp_proc_info: + .long 0x410fc170 + .long 0xff0ffff0 + __v7_proc __v7_cr7mp_setup + .size __v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info + + /* + * ARM Ltd. Cortex A7 processor. + */ + .type __v7_ca7mp_proc_info, #object +__v7_ca7mp_proc_info: + .long 0x410fc070 + .long 0xff0ffff0 + __v7_proc __v7_ca7mp_setup + .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info + + /* + * ARM Ltd. Cortex A12 processor. + */ + .type __v7_ca12mp_proc_info, #object +__v7_ca12mp_proc_info: + .long 0x410fc0d0 + .long 0xff0ffff0 + __v7_proc __v7_ca12mp_setup + .size __v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info + + /* + * ARM Ltd. Cortex A15 processor. + */ + .type __v7_ca15mp_proc_info, #object +__v7_ca15mp_proc_info: + .long 0x410fc0f0 + .long 0xff0ffff0 + __v7_proc __v7_ca15mp_setup + .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info + + /* + * ARM Ltd. Cortex A17 processor. + */ + .type __v7_ca17mp_proc_info, #object +__v7_ca17mp_proc_info: + .long 0x410fc0e0 + .long 0xff0ffff0 + __v7_proc __v7_ca17mp_setup + .size __v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info + + /* + * Qualcomm Inc. Krait processors. + */ + .type __krait_proc_info, #object +__krait_proc_info: + .long 0x510f0400 @ Required ID value + .long 0xff0ffc00 @ Mask for ID + /* + * Some Krait processors don't indicate support for SDIV and UDIV + * instructions in the ARM instruction set, even though they actually + * do support them. + */ + __v7_proc __v7_setup, hwcaps = HWCAP_IDIV + .size __krait_proc_info, . - __krait_proc_info + /* * Match any ARMv7 processor core. */ @@ -399,27 +559,5 @@ __v7_ca9mp_proc_info: __v7_proc_info: .long 0x000f0000 @ Required ID value .long 0x000f0000 @ Mask for ID - ALT_SMP(.long \ - PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ | \ - PMD_FLAGS_SMP) - ALT_UP(.long \ - PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ | \ - PMD_FLAGS_UP) - .long PMD_TYPE_SECT | \ - PMD_SECT_XN | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __v7_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS - .long cpu_v7_name - .long v7_processor_functions - .long v7wbi_tlb_fns - .long v6_user_fns - .long v7_cache_fns + __v7_proc __v7_setup .size __v7_proc_info, . - __v7_proc_info diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S new file mode 100644 index 00000000000..1ca37c72f12 --- /dev/null +++ b/arch/arm/mm/proc-v7m.S @@ -0,0 +1,159 @@ +/* + * linux/arch/arm/mm/proc-v7m.S + * + * Copyright (C) 2008 ARM Ltd. + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv7-M processor support. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/v7m.h> +#include "proc-macros.S" + +ENTRY(cpu_v7m_proc_init) + mov pc, lr +ENDPROC(cpu_v7m_proc_init) + +ENTRY(cpu_v7m_proc_fin) + mov pc, lr +ENDPROC(cpu_v7m_proc_fin) + +/* + * cpu_v7m_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * - loc - location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_v7m_reset) + mov pc, r0 +ENDPROC(cpu_v7m_reset) + +/* + * cpu_v7m_do_idle() + * + * Idle the processor (eg, wait for interrupt). + * + * IRQs are already disabled. + */ +ENTRY(cpu_v7m_do_idle) + wfi + mov pc, lr +ENDPROC(cpu_v7m_do_idle) + +ENTRY(cpu_v7m_dcache_clean_area) + mov pc, lr +ENDPROC(cpu_v7m_dcache_clean_area) + +/* + * There is no MMU, so here is nothing to do. + */ +ENTRY(cpu_v7m_switch_mm) + mov pc, lr +ENDPROC(cpu_v7m_switch_mm) + +.globl cpu_v7m_suspend_size +.equ cpu_v7m_suspend_size, 0 + +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_v7m_do_suspend) + mov pc, lr +ENDPROC(cpu_v7m_do_suspend) + +ENTRY(cpu_v7m_do_resume) + mov pc, lr +ENDPROC(cpu_v7m_do_resume) +#endif + + .section ".text.init", #alloc, #execinstr + +/* + * __v7m_setup + * + * This should be able to cover all ARMv7-M cores. + */ +__v7m_setup: + @ Configure the vector table base address + ldr r0, =BASEADDR_V7M_SCB + ldr r12, =vector_table + str r12, [r0, V7M_SCB_VTOR] + + @ enable UsageFault, BusFault and MemManage fault. + ldr r5, [r0, #V7M_SCB_SHCSR] + orr r5, #(V7M_SCB_SHCSR_USGFAULTENA | V7M_SCB_SHCSR_BUSFAULTENA | V7M_SCB_SHCSR_MEMFAULTENA) + str r5, [r0, #V7M_SCB_SHCSR] + + @ Lower the priority of the SVC and PendSV exceptions + mov r5, #0x80000000 + str r5, [r0, V7M_SCB_SHPR2] @ set SVC priority + mov r5, #0x00800000 + str r5, [r0, V7M_SCB_SHPR3] @ set PendSV priority + + @ SVC to run the kernel in this mode + adr r1, BSYM(1f) + ldr r5, [r12, #11 * 4] @ read the SVC vector entry + str r1, [r12, #11 * 4] @ write the temporary SVC vector entry + mov r6, lr @ save LR + mov r7, sp @ save SP + ldr sp, =__v7m_setup_stack_top + cpsie i + svc #0 +1: cpsid i + str r5, [r12, #11 * 4] @ restore the original SVC vector entry + mov lr, r6 @ restore LR + mov sp, r7 @ restore SP + + @ Special-purpose control register + mov r1, #1 + msr control, r1 @ Thread mode has unpriviledged access + + @ Configure the System Control Register to ensure 8-byte stack alignment + @ Note the STKALIGN bit is either RW or RAO. + ldr r12, [r0, V7M_SCB_CCR] @ system control register + orr r12, #V7M_SCB_CCR_STKALIGN + str r12, [r0, V7M_SCB_CCR] + mov pc, lr +ENDPROC(__v7m_setup) + + .align 2 +__v7m_setup_stack: + .space 4 * 8 @ 8 registers +__v7m_setup_stack_top: + + define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + string cpu_arch_name, "armv7m" + string cpu_elf_name "v7m" + string cpu_v7m_name "ARMv7-M" + + .section ".proc.info.init", #alloc, #execinstr + + /* + * Match any ARMv7-M processor core. + */ + .type __v7m_proc_info, #object +__v7m_proc_info: + .long 0x000f0000 @ Required ID value + .long 0x000f0000 @ Mask for ID + .long 0 @ proc_info_list.__cpu_mm_mmu_flags + .long 0 @ proc_info_list.__cpu_io_mmu_flags + b __v7m_setup @ proc_info_list.__cpu_flush + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT + .long cpu_v7m_name + .long v7m_processor_functions @ proc_info_list.proc + .long 0 @ proc_info_list.tlb + .long 0 @ proc_info_list.user + .long nop_cache_fns @ proc_info_list.cache + .size __v7m_proc_info, . - __v7m_proc_info + diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index ec26355cb7c..dc164589004 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -28,7 +28,6 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/hwcap.h> -#include <mach/hardware.h> #include <asm/pgtable.h> #include <asm/pgtable-hwdef.h> #include <asm/page.h> @@ -106,6 +105,7 @@ ENTRY(cpu_xsc3_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_xsc3_reset) mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE msr cpsr_c, r1 @ reset CPSR @@ -120,6 +120,8 @@ ENTRY(cpu_xsc3_reset) @ already containing those two last instructions to survive. mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs mov pc, r0 +ENDPROC(cpu_xsc3_reset) + .popsection /* * cpu_xsc3_do_idle() @@ -335,17 +337,11 @@ ENTRY(xsc3_dma_unmap_area) mov pc, lr ENDPROC(xsc3_dma_unmap_area) -ENTRY(xsc3_cache_fns) - .long xsc3_flush_icache_all - .long xsc3_flush_kern_cache_all - .long xsc3_flush_user_cache_all - .long xsc3_flush_user_cache_range - .long xsc3_coherent_kern_range - .long xsc3_coherent_user_range - .long xsc3_flush_kern_dcache_area - .long xsc3_dma_map_area - .long xsc3_dma_unmap_area - .long xsc3_dma_flush_range + .globl xsc3_flush_kern_cache_louis + .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xsc3 ENTRY(cpu_xsc3_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line @@ -413,10 +409,42 @@ ENTRY(cpu_xsc3_set_pte_ext) mov pc, lr .ltorg - .align - __CPUINIT +.globl cpu_xsc3_suspend_size +.equ cpu_xsc3_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_xsc3_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode + mrc p15, 0, r5, c15, c1, 0 @ CP access reg + mrc p15, 0, r6, c13, c0, 0 @ PID + mrc p15, 0, r7, c3, c0, 0 @ domain ID + mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mrc p15, 0, r9, c1, c0, 0 @ control reg + bic r4, r4, #2 @ clear frequency change bit + stmia r0, {r4 - r9} @ store cp regs + ldmia sp!, {r4 - r9, pc} +ENDPROC(cpu_xsc3_do_suspend) + +ENTRY(cpu_xsc3_do_resume) + ldmia r0, {r4 - r9} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB + mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer + mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. + mcr p15, 0, r5, c15, c1, 0 @ CP access reg + mcr p15, 0, r6, c13, c0, 0 @ PID + mcr p15, 0, r7, c3, c0, 0 @ domain ID + orr r1, r1, #0x18 @ cache the page table in L2 + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_xsc3_do_resume) +#endif .type __xsc3_setup, #function __xsc3_setup: @@ -460,49 +488,24 @@ xsc3_crval: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - - .type xsc3_processor_functions, #object -ENTRY(xsc3_processor_functions) - .word v5t_early_abort - .word legacy_pabort - .word cpu_xsc3_proc_init - .word cpu_xsc3_proc_fin - .word cpu_xsc3_reset - .word cpu_xsc3_do_idle - .word cpu_xsc3_dcache_clean_area - .word cpu_xsc3_switch_mm - .word cpu_xsc3_set_pte_ext - .size xsc3_processor_functions, . - xsc3_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5te" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_xsc3_name, #object -cpu_xsc3_name: - .asciz "XScale-V3 based processor" - .size cpu_xsc3_name, . - cpu_xsc3_name + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_xsc3_name, "XScale-V3 based processor" .align .section ".proc.info.init", #alloc, #execinstr - .type __xsc3_proc_info,#object -__xsc3_proc_info: - .long 0x69056000 - .long 0xffffe000 +.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask .long PMD_TYPE_SECT | \ PMD_SECT_BUFFERABLE | \ PMD_SECT_CACHEABLE | \ @@ -520,29 +523,10 @@ __xsc3_proc_info: .long v4wbi_tlb_fns .long xsc3_mc_user_fns .long xsc3_cache_fns - .size __xsc3_proc_info, . - __xsc3_proc_info + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm -/* Note: PXA935 changed its implementor ID from Intel to Marvell */ + xsc3_proc_info xsc3, 0x69056000, 0xffffe000 - .type __xsc3_pxa935_proc_info,#object -__xsc3_pxa935_proc_info: - .long 0x56056000 - .long 0xffffe000 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xsc3_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_xsc3_name - .long xsc3_processor_functions - .long v4wbi_tlb_fns - .long xsc3_mc_user_fns - .long xsc3_cache_fns - .size __xsc3_pxa935_proc_info, . - __xsc3_pxa935_proc_info +/* Note: PXA935 changed its implementor ID from Intel to Marvell */ + xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000 diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 523408c0bb3..d19b1cfcad9 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -142,6 +142,7 @@ ENTRY(cpu_xscale_proc_fin) * Beware PXA270 erratum E7. */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_xscale_reset) mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE msr cpsr_c, r1 @ reset CPSR @@ -160,6 +161,8 @@ ENTRY(cpu_xscale_reset) @ already containing those two last instructions to survive. mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs mov pc, r0 +ENDPROC(cpu_xscale_reset) + .popsection /* * cpu_xscale_do_idle() @@ -390,12 +393,12 @@ ENDPROC(xscale_dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(xscale_dma_a0_map_area) +ENTRY(xscale_80200_A0_A1_dma_map_area) add r1, r1, r0 teq r2, #DMA_TO_DEVICE beq xscale_dma_clean_range b xscale_dma_flush_range -ENDPROC(xscsale_dma_a0_map_area) +ENDPROC(xscale_80200_A0_A1_dma_map_area) /* * dma_unmap_area(start, size, dir) @@ -407,17 +410,11 @@ ENTRY(xscale_dma_unmap_area) mov pc, lr ENDPROC(xscale_dma_unmap_area) -ENTRY(xscale_cache_fns) - .long xscale_flush_icache_all - .long xscale_flush_kern_cache_all - .long xscale_flush_user_cache_all - .long xscale_flush_user_cache_range - .long xscale_coherent_kern_range - .long xscale_coherent_user_range - .long xscale_flush_kern_dcache_area - .long xscale_dma_map_area - .long xscale_dma_unmap_area - .long xscale_dma_flush_range + .globl xscale_flush_kern_cache_louis + .equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xscale /* * On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't @@ -432,16 +429,29 @@ ENTRY(xscale_cache_fns) * revision January 22, 2003, available at: * http://www.intel.com/design/iio/specupdt/273415.htm */ -ENTRY(xscale_80200_A0_A1_cache_fns) - .long xscale_flush_kern_cache_all - .long xscale_flush_user_cache_all - .long xscale_flush_user_cache_range - .long xscale_coherent_kern_range - .long xscale_coherent_user_range - .long xscale_flush_kern_dcache_area - .long xscale_dma_a0_map_area - .long xscale_dma_unmap_area - .long xscale_dma_flush_range +.macro a0_alias basename + .globl xscale_80200_A0_A1_\basename + .type xscale_80200_A0_A1_\basename , %function + .equ xscale_80200_A0_A1_\basename , xscale_\basename +.endm + +/* + * Most of the cache functions are unchanged for these processor revisions. + * Export suitable alias symbols for the unchanged functions: + */ + a0_alias flush_icache_all + a0_alias flush_user_cache_all + a0_alias flush_kern_cache_all + a0_alias flush_kern_cache_louis + a0_alias flush_user_cache_range + a0_alias coherent_kern_range + a0_alias coherent_user_range + a0_alias flush_kern_dcache_area + a0_alias dma_flush_range + a0_alias dma_unmap_area + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xscale_80200_A0_A1 ENTRY(cpu_xscale_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -500,8 +510,8 @@ ENTRY(cpu_xscale_set_pte_ext) @ @ Erratum 40: must set memory to write-through for user read-only pages @ - and ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_WRITE) & ~(4 << 2) - teq ip, #L_PTE_MT_WRITEBACK | L_PTE_USER + and ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_RDONLY) & ~(4 << 2) + teq ip, #L_PTE_MT_WRITEBACK | L_PTE_USER | L_PTE_RDONLY moveq r1, #L_PTE_MT_WRITETHROUGH and r1, r1, #L_PTE_MT_MASK @@ -513,12 +523,40 @@ ENTRY(cpu_xscale_set_pte_ext) xscale_set_pte_ext_epilogue mov pc, lr - .ltorg - .align - __CPUINIT +.globl cpu_xscale_suspend_size +.equ cpu_xscale_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_xscale_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode + mrc p15, 0, r5, c15, c1, 0 @ CP access reg + mrc p15, 0, r6, c13, c0, 0 @ PID + mrc p15, 0, r7, c3, c0, 0 @ domain ID + mrc p15, 0, r8, c1, c1, 0 @ auxiliary control reg + mrc p15, 0, r9, c1, c0, 0 @ control reg + bic r4, r4, #2 @ clear frequency change bit + stmia r0, {r4 - r9} @ store cp regs + ldmfd sp!, {r4 - r9, pc} +ENDPROC(cpu_xscale_do_suspend) + +ENTRY(cpu_xscale_do_resume) + ldmia r0, {r4 - r9} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB + mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. + mcr p15, 0, r5, c15, c1, 0 @ CP access reg + mcr p15, 0, r6, c13, c0, 0 @ PID + mcr p15, 0, r7, c3, c0, 0 @ domain ID + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r8, c1, c1, 0 @ auxiliary control reg + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_xscale_do_resume) +#endif .type __xscale_setup, #function __xscale_setup: @@ -549,429 +587,74 @@ xscale_crval: __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - - .type xscale_processor_functions, #object -ENTRY(xscale_processor_functions) - .word v5t_early_abort - .word legacy_pabort - .word cpu_xscale_proc_init - .word cpu_xscale_proc_fin - .word cpu_xscale_reset - .word cpu_xscale_do_idle - .word cpu_xscale_dcache_clean_area - .word cpu_xscale_switch_mm - .word cpu_xscale_set_pte_ext - .size xscale_processor_functions, . - xscale_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions xscale, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5te" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_80200_A0_A1_name, #object -cpu_80200_A0_A1_name: - .asciz "XScale-80200 A0/A1" - .size cpu_80200_A0_A1_name, . - cpu_80200_A0_A1_name - - .type cpu_80200_name, #object -cpu_80200_name: - .asciz "XScale-80200" - .size cpu_80200_name, . - cpu_80200_name - - .type cpu_80219_name, #object -cpu_80219_name: - .asciz "XScale-80219" - .size cpu_80219_name, . - cpu_80219_name - - .type cpu_8032x_name, #object -cpu_8032x_name: - .asciz "XScale-IOP8032x Family" - .size cpu_8032x_name, . - cpu_8032x_name - - .type cpu_8033x_name, #object -cpu_8033x_name: - .asciz "XScale-IOP8033x Family" - .size cpu_8033x_name, . - cpu_8033x_name - - .type cpu_pxa250_name, #object -cpu_pxa250_name: - .asciz "XScale-PXA250" - .size cpu_pxa250_name, . - cpu_pxa250_name - - .type cpu_pxa210_name, #object -cpu_pxa210_name: - .asciz "XScale-PXA210" - .size cpu_pxa210_name, . - cpu_pxa210_name - - .type cpu_ixp42x_name, #object -cpu_ixp42x_name: - .asciz "XScale-IXP42x Family" - .size cpu_ixp42x_name, . - cpu_ixp42x_name - - .type cpu_ixp43x_name, #object -cpu_ixp43x_name: - .asciz "XScale-IXP43x Family" - .size cpu_ixp43x_name, . - cpu_ixp43x_name - - .type cpu_ixp46x_name, #object -cpu_ixp46x_name: - .asciz "XScale-IXP46x Family" - .size cpu_ixp46x_name, . - cpu_ixp46x_name - - .type cpu_ixp2400_name, #object -cpu_ixp2400_name: - .asciz "XScale-IXP2400" - .size cpu_ixp2400_name, . - cpu_ixp2400_name - - .type cpu_ixp2800_name, #object -cpu_ixp2800_name: - .asciz "XScale-IXP2800" - .size cpu_ixp2800_name, . - cpu_ixp2800_name - - .type cpu_pxa255_name, #object -cpu_pxa255_name: - .asciz "XScale-PXA255" - .size cpu_pxa255_name, . - cpu_pxa255_name - - .type cpu_pxa270_name, #object -cpu_pxa270_name: - .asciz "XScale-PXA270" - .size cpu_pxa270_name, . - cpu_pxa270_name + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + + string cpu_80200_A0_A1_name, "XScale-80200 A0/A1" + string cpu_80200_name, "XScale-80200" + string cpu_80219_name, "XScale-80219" + string cpu_8032x_name, "XScale-IOP8032x Family" + string cpu_8033x_name, "XScale-IOP8033x Family" + string cpu_pxa250_name, "XScale-PXA250" + string cpu_pxa210_name, "XScale-PXA210" + string cpu_ixp42x_name, "XScale-IXP42x Family" + string cpu_ixp43x_name, "XScale-IXP43x Family" + string cpu_ixp46x_name, "XScale-IXP46x Family" + string cpu_ixp2400_name, "XScale-IXP2400" + string cpu_ixp2800_name, "XScale-IXP2800" + string cpu_pxa255_name, "XScale-PXA255" + string cpu_pxa270_name, "XScale-PXA270" .align .section ".proc.info.init", #alloc, #execinstr - .type __80200_A0_A1_proc_info,#object -__80200_A0_A1_proc_info: - .long 0x69052000 - .long 0xfffffffe - .long PMD_TYPE_SECT | \ +.macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ PMD_SECT_BUFFERABLE | \ PMD_SECT_CACHEABLE | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ + .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ b __xscale_setup .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_80200_name + .long \cpu_name .long xscale_processor_functions .long v4wbi_tlb_fns .long xscale_mc_user_fns - .long xscale_80200_A0_A1_cache_fns - .size __80200_A0_A1_proc_info, . - __80200_A0_A1_proc_info - - .type __80200_proc_info,#object -__80200_proc_info: - .long 0x69052000 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_80200_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __80200_proc_info, . - __80200_proc_info - - .type __80219_proc_info,#object -__80219_proc_info: - .long 0x69052e20 - .long 0xffffffe0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_80219_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __80219_proc_info, . - __80219_proc_info - - .type __8032x_proc_info,#object -__8032x_proc_info: - .long 0x69052420 - .long 0xfffff7e0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_8032x_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __8032x_proc_info, . - __8032x_proc_info - - .type __8033x_proc_info,#object -__8033x_proc_info: - .long 0x69054010 - .long 0xfffffd30 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_8033x_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __8033x_proc_info, . - __8033x_proc_info - - .type __pxa250_proc_info,#object -__pxa250_proc_info: - .long 0x69052100 - .long 0xfffff7f0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_pxa250_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __pxa250_proc_info, . - __pxa250_proc_info - - .type __pxa210_proc_info,#object -__pxa210_proc_info: - .long 0x69052120 - .long 0xfffff3f0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_pxa210_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __pxa210_proc_info, . - __pxa210_proc_info - - .type __ixp2400_proc_info, #object -__ixp2400_proc_info: - .long 0x69054190 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_ixp2400_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __ixp2400_proc_info, . - __ixp2400_proc_info - - .type __ixp2800_proc_info, #object -__ixp2800_proc_info: - .long 0x690541a0 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_ixp2800_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __ixp2800_proc_info, . - __ixp2800_proc_info - - .type __ixp42x_proc_info, #object -__ixp42x_proc_info: - .long 0x690541c0 - .long 0xffffffc0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_ixp42x_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __ixp42x_proc_info, . - __ixp42x_proc_info - - .type __ixp43x_proc_info, #object -__ixp43x_proc_info: - .long 0x69054040 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_ixp43x_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __ixp43x_proc_info, . - __ixp43x_proc_info - - .type __ixp46x_proc_info, #object -__ixp46x_proc_info: - .long 0x69054200 - .long 0xffffff00 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_ixp46x_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __ixp46x_proc_info, . - __ixp46x_proc_info - - .type __pxa255_proc_info,#object -__pxa255_proc_info: - .long 0x69052d00 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_pxa255_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __pxa255_proc_info, . - __pxa255_proc_info - - .type __pxa270_proc_info,#object -__pxa270_proc_info: - .long 0x69054110 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - .long PMD_TYPE_SECT | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_pxa270_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __pxa270_proc_info, . - __pxa270_proc_info - + .ifb \cache + .long xscale_cache_fns + .else + .long \cache + .endif + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + xscale_proc_info 80200_A0_A1, 0x69052000, 0xfffffffe, cpu_80200_name, \ + cache=xscale_80200_A0_A1_cache_fns + xscale_proc_info 80200, 0x69052000, 0xfffffff0, cpu_80200_name + xscale_proc_info 80219, 0x69052e20, 0xffffffe0, cpu_80219_name + xscale_proc_info 8032x, 0x69052420, 0xfffff7e0, cpu_8032x_name + xscale_proc_info 8033x, 0x69054010, 0xfffffd30, cpu_8033x_name + xscale_proc_info pxa250, 0x69052100, 0xfffff7f0, cpu_pxa250_name + xscale_proc_info pxa210, 0x69052120, 0xfffff3f0, cpu_pxa210_name + xscale_proc_info ixp2400, 0x69054190, 0xfffffff0, cpu_ixp2400_name + xscale_proc_info ixp2800, 0x690541a0, 0xfffffff0, cpu_ixp2800_name + xscale_proc_info ixp42x, 0x690541c0, 0xffffffc0, cpu_ixp42x_name + xscale_proc_info ixp43x, 0x69054040, 0xfffffff0, cpu_ixp43x_name + xscale_proc_info ixp46x, 0x69054200, 0xffffff00, cpu_ixp46x_name + xscale_proc_info pxa255, 0x69052d00, 0xfffffff0, cpu_pxa255_name + xscale_proc_info pxa270, 0x69054110, 0xfffffff0, cpu_pxa270_name diff --git a/arch/arm/mm/tcm.h b/arch/arm/mm/tcm.h new file mode 100644 index 00000000000..8015ad434a4 --- /dev/null +++ b/arch/arm/mm/tcm.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2008-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * TCM memory handling for ARM systems + * + * Author: Linus Walleij <linus.walleij@stericsson.com> + * Author: Rickard Andersson <rickard.andersson@stericsson.com> + */ + +#ifdef CONFIG_HAVE_TCM +void __init tcm_init(void); +#else +/* No TCM support, just blank inlines to be optimized out */ +inline void tcm_init(void) +{ +} +#endif diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S index 9694f1f6f48..d3ddcf9a76c 100644 --- a/arch/arm/mm/tlb-fa.S +++ b/arch/arm/mm/tlb-fa.S @@ -46,7 +46,6 @@ ENTRY(fa_flush_user_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB mcr p15, 0, r3, c7, c10, 4 @ data write barrier mov pc, lr @@ -60,16 +59,11 @@ ENTRY(fa_flush_kern_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mcr p15, 0, r3, c7, c5, 6 @ invalidate BTB mcr p15, 0, r3, c7, c10, 4 @ data write barrier - mcr p15, 0, r3, c7, c5, 4 @ prefetch flush + mcr p15, 0, r3, c7, c5, 4 @ prefetch flush (isb) mov pc, lr __INITDATA - .type fa_tlb_fns, #object -ENTRY(fa_tlb_fns) - .long fa_flush_user_tlb_range - .long fa_flush_kern_tlb_range - .long fa_tlb_flags - .size fa_tlb_fns, . - fa_tlb_fns + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions fa, fa_tlb_flags diff --git a/arch/arm/mm/tlb-v3.S b/arch/arm/mm/tlb-v3.S deleted file mode 100644 index c10786ec8e0..00000000000 --- a/arch/arm/mm/tlb-v3.S +++ /dev/null @@ -1,52 +0,0 @@ -/* - * linux/arch/arm/mm/tlbv3.S - * - * Copyright (C) 1997-2002 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * ARM architecture version 3 TLB handling functions. - * - * Processors: ARM610, ARM710. - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/asm-offsets.h> -#include <asm/tlbflush.h> -#include "proc-macros.S" - - .align 5 -/* - * v3_flush_user_tlb_range(start, end, mm) - * - * Invalidate a range of TLB entries in the specified address space. - * - * - start - range start address - * - end - range end address - * - mm - mm_struct describing address space - */ - .align 5 -ENTRY(v3_flush_user_tlb_range) - vma_vm_mm r2, r2 - act_mm r3 @ get current->active_mm - teq r2, r3 @ == mm ? - movne pc, lr @ no, we dont do anything -ENTRY(v3_flush_kern_tlb_range) - bic r0, r0, #0x0ff - bic r0, r0, #0xf00 -1: mcr p15, 0, r0, c6, c0, 0 @ invalidate TLB entry - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - mov pc, lr - - __INITDATA - - .type v3_tlb_fns, #object -ENTRY(v3_tlb_fns) - .long v3_flush_user_tlb_range - .long v3_flush_kern_tlb_range - .long v3_tlb_flags - .size v3_tlb_fns, . - v3_tlb_fns diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S index d6c94457c2b..17a025ade57 100644 --- a/arch/arm/mm/tlb-v4.S +++ b/arch/arm/mm/tlb-v4.S @@ -57,9 +57,5 @@ ENTRY(v4_flush_user_tlb_range) __INITDATA - .type v4_tlb_fns, #object -ENTRY(v4_tlb_fns) - .long v4_flush_user_tlb_range - .long v4_flush_kern_tlb_range - .long v4_tlb_flags - .size v4_tlb_fns, . - v4_tlb_fns + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4, v4_tlb_flags diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S index cb829ca7845..c04598fa4d4 100644 --- a/arch/arm/mm/tlb-v4wb.S +++ b/arch/arm/mm/tlb-v4wb.S @@ -69,9 +69,5 @@ ENTRY(v4wb_flush_kern_tlb_range) __INITDATA - .type v4wb_tlb_fns, #object -ENTRY(v4wb_tlb_fns) - .long v4wb_flush_user_tlb_range - .long v4wb_flush_kern_tlb_range - .long v4wb_tlb_flags - .size v4wb_tlb_fns, . - v4wb_tlb_fns + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4wb, v4wb_tlb_flags diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S index 60cfc4a25dd..1f6062b6c1c 100644 --- a/arch/arm/mm/tlb-v4wbi.S +++ b/arch/arm/mm/tlb-v4wbi.S @@ -60,9 +60,5 @@ ENTRY(v4wbi_flush_kern_tlb_range) __INITDATA - .type v4wbi_tlb_fns, #object -ENTRY(v4wbi_tlb_fns) - .long v4wbi_flush_user_tlb_range - .long v4wbi_flush_kern_tlb_range - .long v4wbi_tlb_flags - .size v4wbi_tlb_fns, . - v4wbi_tlb_fns + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4wbi, v4wbi_tlb_flags diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S index 73d7d89b04c..eca07f550a0 100644 --- a/arch/arm/mm/tlb-v6.S +++ b/arch/arm/mm/tlb-v6.S @@ -54,7 +54,6 @@ ENTRY(v6wbi_flush_user_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mcr p15, 0, ip, c7, c5, 6 @ flush BTAC/BTB mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier mov pc, lr @@ -83,16 +82,11 @@ ENTRY(v6wbi_flush_kern_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier - mcr p15, 0, r2, c7, c5, 4 @ prefetch flush + mcr p15, 0, r2, c7, c5, 4 @ prefetch flush (isb) mov pc, lr __INIT - .type v6wbi_tlb_fns, #object -ENTRY(v6wbi_tlb_fns) - .long v6wbi_flush_user_tlb_range - .long v6wbi_flush_kern_tlb_range - .long v6wbi_tlb_flags - .size v6wbi_tlb_fns, . - v6wbi_tlb_fns + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v6wbi, v6wbi_tlb_flags diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index 53cd5b45467..355308767ba 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -35,23 +35,28 @@ ENTRY(v7wbi_flush_user_tlb_range) vma_vm_mm r3, r2 @ get vma->vm_mm mmid r3, r3 @ get vm_mm->context.id - dsb + dsb ish mov r0, r0, lsr #PAGE_SHIFT @ align address mov r1, r1, lsr #PAGE_SHIFT asid r3, r3 @ mask ASID +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(W(mov) r3, #0 ) + ALT_UP(W(nop) ) +#endif orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA mov r1, r1, lsl #PAGE_SHIFT 1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov ip, #0 - ALT_SMP(mcr p15, 0, ip, c7, c1, 6) @ flush BTAC/BTB Inner Shareable - ALT_UP(mcr p15, 0, ip, c7, c5, 6) @ flush BTAC/BTB - dsb + dsb ish mov pc, lr ENDPROC(v7wbi_flush_user_tlb_range) @@ -64,31 +69,27 @@ ENDPROC(v7wbi_flush_user_tlb_range) * - end - end address (exclusive, may not be aligned) */ ENTRY(v7wbi_flush_kern_tlb_range) - dsb + dsb ish mov r0, r0, lsr #PAGE_SHIFT @ align address mov r1, r1, lsr #PAGE_SHIFT mov r0, r0, lsl #PAGE_SHIFT mov r1, r1, lsl #PAGE_SHIFT 1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov r2, #0 - ALT_SMP(mcr p15, 0, r2, c7, c1, 6) @ flush BTAC/BTB Inner Shareable - ALT_UP(mcr p15, 0, r2, c7, c5, 6) @ flush BTAC/BTB - dsb + dsb ish isb mov pc, lr ENDPROC(v7wbi_flush_kern_tlb_range) __INIT - .type v7wbi_tlb_fns, #object -ENTRY(v7wbi_tlb_fns) - .long v7wbi_flush_user_tlb_range - .long v7wbi_flush_kern_tlb_range - ALT_SMP(.long v7wbi_tlb_flags_smp) - ALT_UP(.long v7wbi_tlb_flags_up) - .size v7wbi_tlb_fns, . - v7wbi_tlb_fns + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v7wbi, v7wbi_tlb_flags_up, flags_smp=v7wbi_tlb_flags_smp diff --git a/arch/arm/mm/vmregion.c b/arch/arm/mm/vmregion.c deleted file mode 100644 index 935993e1b1e..00000000000 --- a/arch/arm/mm/vmregion.c +++ /dev/null @@ -1,132 +0,0 @@ -#include <linux/spinlock.h> -#include <linux/list.h> -#include <linux/slab.h> - -#include "vmregion.h" - -/* - * VM region handling support. - * - * This should become something generic, handling VM region allocations for - * vmalloc and similar (ioremap, module space, etc). - * - * I envisage vmalloc()'s supporting vm_struct becoming: - * - * struct vm_struct { - * struct vmregion region; - * unsigned long flags; - * struct page **pages; - * unsigned int nr_pages; - * unsigned long phys_addr; - * }; - * - * get_vm_area() would then call vmregion_alloc with an appropriate - * struct vmregion head (eg): - * - * struct vmregion vmalloc_head = { - * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), - * .vm_start = VMALLOC_START, - * .vm_end = VMALLOC_END, - * }; - * - * However, vmalloc_head.vm_start is variable (typically, it is dependent on - * the amount of RAM found at boot time.) I would imagine that get_vm_area() - * would have to initialise this each time prior to calling vmregion_alloc(). - */ - -struct arm_vmregion * -arm_vmregion_alloc(struct arm_vmregion_head *head, size_t align, - size_t size, gfp_t gfp) -{ - unsigned long addr = head->vm_start, end = head->vm_end - size; - unsigned long flags; - struct arm_vmregion *c, *new; - - if (head->vm_end - head->vm_start < size) { - printk(KERN_WARNING "%s: allocation too big (requested %#x)\n", - __func__, size); - goto out; - } - - new = kmalloc(sizeof(struct arm_vmregion), gfp); - if (!new) - goto out; - - spin_lock_irqsave(&head->vm_lock, flags); - - list_for_each_entry(c, &head->vm_list, vm_list) { - if ((addr + size) < addr) - goto nospc; - if ((addr + size) <= c->vm_start) - goto found; - addr = ALIGN(c->vm_end, align); - if (addr > end) - goto nospc; - } - - found: - /* - * Insert this entry _before_ the one we found. - */ - list_add_tail(&new->vm_list, &c->vm_list); - new->vm_start = addr; - new->vm_end = addr + size; - new->vm_active = 1; - - spin_unlock_irqrestore(&head->vm_lock, flags); - return new; - - nospc: - spin_unlock_irqrestore(&head->vm_lock, flags); - kfree(new); - out: - return NULL; -} - -static struct arm_vmregion *__arm_vmregion_find(struct arm_vmregion_head *head, unsigned long addr) -{ - struct arm_vmregion *c; - - list_for_each_entry(c, &head->vm_list, vm_list) { - if (c->vm_active && c->vm_start == addr) - goto out; - } - c = NULL; - out: - return c; -} - -struct arm_vmregion *arm_vmregion_find(struct arm_vmregion_head *head, unsigned long addr) -{ - struct arm_vmregion *c; - unsigned long flags; - - spin_lock_irqsave(&head->vm_lock, flags); - c = __arm_vmregion_find(head, addr); - spin_unlock_irqrestore(&head->vm_lock, flags); - return c; -} - -struct arm_vmregion *arm_vmregion_find_remove(struct arm_vmregion_head *head, unsigned long addr) -{ - struct arm_vmregion *c; - unsigned long flags; - - spin_lock_irqsave(&head->vm_lock, flags); - c = __arm_vmregion_find(head, addr); - if (c) - c->vm_active = 0; - spin_unlock_irqrestore(&head->vm_lock, flags); - return c; -} - -void arm_vmregion_free(struct arm_vmregion_head *head, struct arm_vmregion *c) -{ - unsigned long flags; - - spin_lock_irqsave(&head->vm_lock, flags); - list_del(&c->vm_list); - spin_unlock_irqrestore(&head->vm_lock, flags); - - kfree(c); -} diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h deleted file mode 100644 index 15e9f044db9..00000000000 --- a/arch/arm/mm/vmregion.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef VMREGION_H -#define VMREGION_H - -#include <linux/spinlock.h> -#include <linux/list.h> - -struct page; - -struct arm_vmregion_head { - spinlock_t vm_lock; - struct list_head vm_list; - unsigned long vm_start; - unsigned long vm_end; -}; - -struct arm_vmregion { - struct list_head vm_list; - unsigned long vm_start; - unsigned long vm_end; - struct page *vm_pages; - int vm_active; -}; - -struct arm_vmregion *arm_vmregion_alloc(struct arm_vmregion_head *, size_t, size_t, gfp_t); -struct arm_vmregion *arm_vmregion_find(struct arm_vmregion_head *, unsigned long); -struct arm_vmregion *arm_vmregion_find_remove(struct arm_vmregion_head *, unsigned long); -void arm_vmregion_free(struct arm_vmregion_head *, struct arm_vmregion *); - -#endif |
