diff options
Diffstat (limited to 'arch/arm/mm')
104 files changed, 18496 insertions, 5366 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index e3c14d6b432..c348eaee7ee 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -1,57 +1,35 @@ comment "Processor Type" -config CPU_32 - bool - default y - # Select CPU types depending on the architecture selected. This selects # which CPUs we support in the kernel image, and the compiler instruction # optimiser behaviour. -# ARM610 -config CPU_ARM610 - bool "Support ARM610 processor" - depends on ARCH_RPC - select CPU_32v3 - select CPU_CACHE_V3 - select CPU_CACHE_VIVT - select CPU_COPY_V3 - select CPU_TLB_V3 - help - The ARM610 is the successor to the ARM3 processor - and was produced by VLSI Technology Inc. - - Say Y if you want support for the ARM610 processor. - Otherwise, say N. - -# ARM710 -config CPU_ARM710 - bool "Support ARM710 processor" if !ARCH_CLPS7500 && ARCH_RPC - default y if ARCH_CLPS7500 - select CPU_32v3 - select CPU_CACHE_V3 - select CPU_CACHE_VIVT - select CPU_COPY_V3 - select CPU_TLB_V3 +# ARM7TDMI +config CPU_ARM7TDMI + bool "Support ARM7TDMI processor" + depends on !MMU + select CPU_32v4T + select CPU_ABRT_LV4T + select CPU_CACHE_V4 + select CPU_PABRT_LEGACY help A 32-bit RISC microprocessor based on the ARM7 processor core - designed by Advanced RISC Machines Ltd. The ARM710 is the - successor to the ARM610 processor. It was released in - July 1994 by VLSI Technology Inc. + which has no memory control unit and cache. - Say Y if you want support for the ARM710 processor. + Say Y if you want support for the ARM7TDMI processor. Otherwise, say N. # ARM720T config CPU_ARM720T - bool "Support ARM720T processor" if !ARCH_CLPS711X && !ARCH_L7200 && !ARCH_CDB89712 && ARCH_INTEGRATOR - default y if ARCH_CLPS711X || ARCH_L7200 || ARCH_CDB89712 || ARCH_H720X - select CPU_32v4 + bool "Support ARM720T processor" if ARCH_INTEGRATOR + select CPU_32v4T select CPU_ABRT_LV4T select CPU_CACHE_V4 select CPU_CACHE_VIVT - select CPU_COPY_V4WT - select CPU_TLB_V4WT + select CPU_COPY_V4WT if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WT if MMU help A 32-bit RISC processor with 8kByte Cache, Write Buffer and MMU built around an ARM7TDMI core. @@ -59,23 +37,52 @@ config CPU_ARM720T Say Y if you want support for the ARM720T processor. Otherwise, say N. +# ARM740T +config CPU_ARM740T + bool "Support ARM740T processor" if ARCH_INTEGRATOR + depends on !MMU + select CPU_32v4T + select CPU_ABRT_LV4T + select CPU_CACHE_V4 + select CPU_CP15_MPU + select CPU_PABRT_LEGACY + help + A 32-bit RISC processor with 8KB cache or 4KB variants, + write buffer and MPU(Protection Unit) built around + an ARM7TDMI core. + + Say Y if you want support for the ARM740T processor. + Otherwise, say N. + +# ARM9TDMI +config CPU_ARM9TDMI + bool "Support ARM9TDMI processor" + depends on !MMU + select CPU_32v4T + select CPU_ABRT_NOMMU + select CPU_CACHE_V4 + select CPU_PABRT_LEGACY + help + A 32-bit RISC microprocessor based on the ARM9 processor core + which has no memory control unit and cache. + + Say Y if you want support for the ARM9TDMI processor. + Otherwise, say N. + # ARM920T config CPU_ARM920T - bool "Support ARM920T processor" if !ARCH_S3C2410 - depends on ARCH_INTEGRATOR || ARCH_S3C2410 || ARCH_IMX || ARCH_AAEC2000 - default y if ARCH_S3C2410 - select CPU_32v4 + bool "Support ARM920T processor" if ARCH_INTEGRATOR + select CPU_32v4T select CPU_ABRT_EV4T select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_COPY_V4WB - select CPU_TLB_V4WBI + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WBI if MMU help The ARM920T is licensed to be produced by numerous vendors, - and is used in the Maverick EP9312 and the Samsung S3C2410. - - More information on the Maverick EP9312 at - <http://linuxdevices.com/products/PD2382866068.html>. + and is used in the Cirrus EP93xx and the Samsung S3C2410. Say Y if you want support for the ARM920T processor. Otherwise, say N. @@ -83,18 +90,18 @@ config CPU_ARM920T # ARM922T config CPU_ARM922T bool "Support ARM922T processor" if ARCH_INTEGRATOR - depends on ARCH_CAMELOT || ARCH_LH7A40X || ARCH_INTEGRATOR - default y if ARCH_CAMELOT || ARCH_LH7A40X - select CPU_32v4 + select CPU_32v4T select CPU_ABRT_EV4T select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_COPY_V4WB - select CPU_TLB_V4WBI + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WBI if MMU help The ARM922T is a version of the ARM920T, but with smaller instruction and data caches. It is used in Altera's - Excalibur XA device family. + Excalibur XA device family and Micrel's KS8695 Centaur. Say Y if you want support for the ARM922T processor. Otherwise, say N. @@ -102,14 +109,14 @@ config CPU_ARM922T # ARM925T config CPU_ARM925T bool "Support ARM925T processor" if ARCH_OMAP1 - depends on ARCH_OMAP1510 - default y if ARCH_OMAP1510 - select CPU_32v4 + select CPU_32v4T select CPU_ABRT_EV4T select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_COPY_V4WB - select CPU_TLB_V4WBI + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WBI if MMU help The ARM925T is a mix between the ARM920T and ARM926T, but with different instruction and data caches. It is used in TI's OMAP @@ -120,14 +127,14 @@ config CPU_ARM925T # ARM926T config CPU_ARM926T - bool "Support ARM926T processor" - depends on ARCH_INTEGRATOR || ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX || MACH_REALVIEW_EB - default y if ARCH_VERSATILE_PB || MACH_VERSATILE_AB || ARCH_OMAP730 || ARCH_OMAP16XX + bool "Support ARM926T processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB select CPU_32v5 select CPU_ABRT_EV5TJ select CPU_CACHE_VIVT - select CPU_COPY_V4WB - select CPU_TLB_V4WBI + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WBI if MMU help This is a variant of the ARM920. It has slightly different instruction sequences for cache and TLB operations. Curiously, @@ -136,16 +143,70 @@ config CPU_ARM926T Say Y if you want support for the ARM926T processor. Otherwise, say N. +# FA526 +config CPU_FA526 + bool + select CPU_32v4 + select CPU_ABRT_EV4 + select CPU_CACHE_FA + select CPU_CACHE_VIVT + select CPU_COPY_FA if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_FA if MMU + help + The FA526 is a version of the ARMv4 compatible processor with + Branch Target Buffer, Unified TLB and cache line size 16. + + Say Y if you want support for the FA526 processor. + Otherwise, say N. + +# ARM940T +config CPU_ARM940T + bool "Support ARM940T processor" if ARCH_INTEGRATOR + depends on !MMU + select CPU_32v4T + select CPU_ABRT_NOMMU + select CPU_CACHE_VIVT + select CPU_CP15_MPU + select CPU_PABRT_LEGACY + help + ARM940T is a member of the ARM9TDMI family of general- + purpose microprocessors with MPU and separate 4KB + instruction and 4KB data cases, each with a 4-word line + length. + + Say Y if you want support for the ARM940T processor. + Otherwise, say N. + +# ARM946E-S +config CPU_ARM946E + bool "Support ARM946E-S processor" if ARCH_INTEGRATOR + depends on !MMU + select CPU_32v5 + select CPU_ABRT_NOMMU + select CPU_CACHE_VIVT + select CPU_CP15_MPU + select CPU_PABRT_LEGACY + help + ARM946E-S is a member of the ARM9E-S family of high- + performance, 32-bit system-on-chip processor solutions. + The TCM and ARMv5TE 32-bit instruction set is supported. + + Say Y if you want support for the ARM946E-S processor. + Otherwise, say N. + # ARM1020 - needs validating config CPU_ARM1020 - bool "Support ARM1020T (rev 0) processor" - depends on ARCH_INTEGRATOR + bool "Support ARM1020T (rev 0) processor" if ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV4T select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_COPY_V4WB - select CPU_TLB_V4WBI + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WBI if MMU help The ARM1020 is the 32K cached version of the ARM10 processor, with an addition of a floating-point unit. @@ -155,25 +216,27 @@ config CPU_ARM1020 # ARM1020E - needs validating config CPU_ARM1020E - bool "Support ARM1020E processor" - depends on ARCH_INTEGRATOR + bool "Support ARM1020E processor" if ARCH_INTEGRATOR + depends on n select CPU_32v5 select CPU_ABRT_EV4T select CPU_CACHE_V4WT select CPU_CACHE_VIVT - select CPU_COPY_V4WB - select CPU_TLB_V4WBI - depends on n + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WBI if MMU # ARM1022E config CPU_ARM1022 - bool "Support ARM1022E processor" - depends on ARCH_INTEGRATOR + bool "Support ARM1022E processor" if ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV4T select CPU_CACHE_VIVT - select CPU_COPY_V4WB # can probably do better - select CPU_TLB_V4WBI + select CPU_COPY_V4WB if MMU # can probably do better + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WBI if MMU help The ARM1022E is an implementation of the ARMv5TE architecture based upon the ARM10 integer core with a 16KiB L1 Harvard cache, @@ -184,13 +247,14 @@ config CPU_ARM1022 # ARM1026EJ-S config CPU_ARM1026 - bool "Support ARM1026EJ-S processor" - depends on ARCH_INTEGRATOR + bool "Support ARM1026EJ-S processor" if ARCH_INTEGRATOR select CPU_32v5 select CPU_ABRT_EV5T # But need Jazelle, but EV5TJ ignores bit 10 select CPU_CACHE_VIVT - select CPU_COPY_V4WB # can probably do better - select CPU_TLB_V4WBI + select CPU_COPY_V4WB if MMU # can probably do better + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WBI if MMU help The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture based upon the ARM10 integer core. @@ -200,15 +264,16 @@ config CPU_ARM1026 # SA110 config CPU_SA110 - bool "Support StrongARM(R) SA-110 processor" if !ARCH_EBSA110 && !FOOTBRIDGE && !ARCH_TBOX && !ARCH_SHARK && !ARCH_NEXUSPCI && ARCH_RPC - default y if ARCH_EBSA110 || FOOTBRIDGE || ARCH_TBOX || ARCH_SHARK || ARCH_NEXUSPCI + bool select CPU_32v3 if ARCH_RPC select CPU_32v4 if !ARCH_RPC select CPU_ABRT_EV4 select CPU_CACHE_V4WB select CPU_CACHE_VIVT - select CPU_COPY_V4WB - select CPU_TLB_V4WB + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WB if MMU help The Intel StrongARM(R) SA-110 is a 32-bit microprocessor and is available at five speeds ranging from 100 MHz to 233 MHz. @@ -221,62 +286,181 @@ config CPU_SA110 # SA1100 config CPU_SA1100 bool - depends on ARCH_SA1100 - default y select CPU_32v4 select CPU_ABRT_EV4 select CPU_CACHE_V4WB select CPU_CACHE_VIVT - select CPU_TLB_V4WB + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WB if MMU # XScale config CPU_XSCALE bool - depends on ARCH_IOP3XX || ARCH_PXA || ARCH_IXP4XX || ARCH_IXP2000 - default y select CPU_32v5 select CPU_ABRT_EV5T select CPU_CACHE_VIVT - select CPU_TLB_V4WBI + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WBI if MMU + +# XScale Core Version 3 +config CPU_XSC3 + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WBI if MMU + select IO_36 + +# Marvell PJ1 (Mohawk) +config CPU_MOHAWK + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WBI if MMU + +# Feroceon +config CPU_FEROCEON + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_CACHE_VIVT + select CPU_COPY_FEROCEON if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_FEROCEON if MMU + +config CPU_FEROCEON_OLD_ID + bool "Accept early Feroceon cores with an ARM926 ID" + depends on CPU_FEROCEON && !CPU_ARM926T + default y + help + This enables the usage of some old Feroceon cores + for which the CPU ID is equal to the ARM926 ID. + Relevant for Feroceon-1850 and early Feroceon-2850. + +# Marvell PJ4 +config CPU_PJ4 + bool + select ARM_THUMBEE + select CPU_V7 + +config CPU_PJ4B + bool + select CPU_V7 # ARMv6 config CPU_V6 - bool "Support ARM V6 processor" - depends on ARCH_INTEGRATOR || MACH_REALVIEW_EB + bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX select CPU_32v6 select CPU_ABRT_EV6 select CPU_CACHE_V6 select CPU_CACHE_VIPT - select CPU_COPY_V6 - select CPU_TLB_V6 + select CPU_COPY_V6 if MMU + select CPU_CP15_MMU + select CPU_HAS_ASID if MMU + select CPU_PABRT_V6 + select CPU_TLB_V6 if MMU # ARMv6k -config CPU_32v6K - bool "Support ARM V6K processor extensions" if !SMP - depends on CPU_V6 - default y if SMP +config CPU_V6K + bool "Support ARM V6K processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX + select CPU_32v6 + select CPU_32v6K + select CPU_ABRT_EV6 + select CPU_CACHE_V6 + select CPU_CACHE_VIPT + select CPU_COPY_V6 if MMU + select CPU_CP15_MMU + select CPU_HAS_ASID if MMU + select CPU_PABRT_V6 + select CPU_TLB_V6 if MMU + +# ARMv7 +config CPU_V7 + bool "Support ARM V7 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX + select CPU_32v6K + select CPU_32v7 + select CPU_ABRT_EV7 + select CPU_CACHE_V7 + select CPU_CACHE_VIPT + select CPU_COPY_V6 if MMU + select CPU_CP15_MMU if MMU + select CPU_CP15_MPU if !MMU + select CPU_HAS_ASID if MMU + select CPU_PABRT_V7 + select CPU_TLB_V7 if MMU + +# ARMv7M +config CPU_V7M + bool + select CPU_32v7M + select CPU_ABRT_NOMMU + select CPU_CACHE_NOP + select CPU_PABRT_LEGACY + select CPU_THUMBONLY + +config CPU_THUMBONLY + bool + # There are no CPUs available with MMU that don't implement an ARM ISA: + depends on !MMU help - Say Y here if your ARMv6 processor supports the 'K' extension. - This enables the kernel to use some instructions not present - on previous processors, and as such a kernel build with this - enabled will not boot on processors with do not support these - instructions. + Select this if your CPU doesn't support the 32 bit ARM instructions. # Figure out what processor architecture version we should be using. # This defines the compiler instruction set which depends on the machine type. config CPU_32v3 bool + select CPU_USE_DOMAINS if MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU config CPU_32v4 bool + select CPU_USE_DOMAINS if MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU + +config CPU_32v4T + bool + select CPU_USE_DOMAINS if MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU config CPU_32v5 bool + select CPU_USE_DOMAINS if MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU config CPU_32v6 bool + select TLS_REG_EMUL if !CPU_32v6K && !MMU + +config CPU_32v6K + bool + +config CPU_32v7 + bool + +config CPU_32v7M + bool # The abort model +config CPU_ABRT_NOMMU + bool + config CPU_ABRT_EV4 bool @@ -295,10 +479,19 @@ config CPU_ABRT_EV5TJ config CPU_ABRT_EV6 bool -# The cache model -config CPU_CACHE_V3 +config CPU_ABRT_EV7 + bool + +config CPU_PABRT_LEGACY + bool + +config CPU_PABRT_V6 + bool + +config CPU_PABRT_V7 bool +# The cache model config CPU_CACHE_V4 bool @@ -311,31 +504,39 @@ config CPU_CACHE_V4WB config CPU_CACHE_V6 bool +config CPU_CACHE_V7 + bool + +config CPU_CACHE_NOP + bool + config CPU_CACHE_VIVT bool config CPU_CACHE_VIPT bool -# The copy-page model -config CPU_COPY_V3 +config CPU_CACHE_FA bool +if MMU +# The copy-page model config CPU_COPY_V4WT bool config CPU_COPY_V4WB bool -config CPU_COPY_V6 +config CPU_COPY_FEROCEON bool -# This selects the TLB model -config CPU_TLB_V3 +config CPU_COPY_FA + bool + +config CPU_COPY_V6 bool - help - ARM Architecture Version 3 TLB. +# This selects the TLB model config CPU_TLB_V4WT bool help @@ -352,14 +553,90 @@ config CPU_TLB_V4WBI ARM Architecture Version 4 TLB with writeback cache and invalidate instruction cache entry. +config CPU_TLB_FEROCEON + bool + help + Feroceon TLB (v4wbi with non-outer-cachable page table walks). + +config CPU_TLB_FA + bool + help + Faraday ARM FA526 architecture, unified TLB with writeback cache + and invalidate instruction cache entry. Branch target buffer is + also supported. + config CPU_TLB_V6 bool +config CPU_TLB_V7 + bool + +config VERIFY_PERMISSION_FAULT + bool +endif + +config CPU_HAS_ASID + bool + help + This indicates whether the CPU has the ASID register; used to + tag TLB and possibly cache entries. + +config CPU_CP15 + bool + help + Processor has the CP15 register. + +config CPU_CP15_MMU + bool + select CPU_CP15 + help + Processor has the CP15 register, which has MMU related registers. + +config CPU_CP15_MPU + bool + select CPU_CP15 + help + Processor has the CP15 register, which has MPU related registers. + +config CPU_USE_DOMAINS + bool + help + This option enables or disables the use of domain switching + via the set_fs() function. + +# +# CPU supports 36-bit I/O +# +config IO_36 + bool + comment "Processor Features" +config ARM_LPAE + bool "Support for the Large Physical Address Extension" + depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \ + !CPU_32v4 && !CPU_32v3 + help + Say Y if you have an ARMv7 processor supporting the LPAE page + table format and you would like to access memory beyond the + 4GB limit. The resulting kernel image will not run on + processors without the LPA extension. + + If unsure, say N. + +config ARCH_PHYS_ADDR_T_64BIT + def_bool ARM_LPAE + +config ARCH_DMA_ADDR_T_64BIT + bool + config ARM_THUMB - bool "Support Thumb user binaries" - depends on CPU_ARM720T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_V6 + bool "Support Thumb user binaries" if !CPU_THUMBONLY + depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || \ + CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || \ + CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \ + CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || \ + CPU_V7 || CPU_FEROCEON || CPU_V7M default y help Say Y if you want to include kernel support for running user space @@ -371,6 +648,53 @@ config ARM_THUMB If you don't know what this all is, saying Y is a safe choice. +config ARM_THUMBEE + bool "Enable ThumbEE CPU extension" + depends on CPU_V7 + help + Say Y here if you have a CPU with the ThumbEE extension and code to + make use of it. Say N for code that can run on CPUs without ThumbEE. + +config ARM_VIRT_EXT + bool + depends on MMU + default y if CPU_V7 + help + Enable the kernel to make use of the ARM Virtualization + Extensions to install hypervisors without run-time firmware + assistance. + + A compliant bootloader is required in order to make maximum + use of this feature. Refer to Documentation/arm/Booting for + details. + +config SWP_EMULATE + bool "Emulate SWP/SWPB instructions" + depends on CPU_V7 + default y if SMP + select HAVE_PROC_CPU if PROC_FS + help + ARMv6 architecture deprecates use of the SWP/SWPB instructions. + ARMv7 multiprocessing extensions introduce the ability to disable + these instructions, triggering an undefined instruction exception + when executed. Say Y here to enable software emulation of these + instructions for userspace (not kernel) using LDREX/STREX. + Also creates /proc/cpu/swp_emulation for statistics. + + In some older versions of glibc [<=2.8] SWP is used during futex + trylock() operations with the assumption that the code will not + be preempted. This invalid assumption may be more likely to fail + with SWP emulation enabled, leading to deadlock of the user + application. + + NOTE: when accessing uncached shared regions, LDREX/STREX rely + on an external transaction monitoring block called a global + monitor to maintain update atomicity. If your system does not + implement a global monitor, this option can cause programs that + perform SWP operations to uncached memory to deadlock. + + If unsure, say Y. + config CPU_BIG_ENDIAN bool "Build big-endian kernel" depends on ARCH_SUPPORTS_BIG_ENDIAN @@ -380,23 +704,61 @@ config CPU_BIG_ENDIAN port must properly enable any big-endian related features of your chipset/board/processor. +config CPU_ENDIAN_BE8 + bool + depends on CPU_BIG_ENDIAN + default CPU_V6 || CPU_V6K || CPU_V7 + help + Support for the BE-8 (big-endian) mode on ARMv6 and ARMv7 processors. + +config CPU_ENDIAN_BE32 + bool + depends on CPU_BIG_ENDIAN + default !CPU_ENDIAN_BE8 + help + Support for the BE-32 (big-endian) mode on pre-ARMv6 processors. + +config CPU_HIGH_VECTOR + depends on !MMU && CPU_CP15 && !CPU_ARM740T + bool "Select the High exception vector" + help + Say Y here to select high exception vector(0xFFFF0000~). + The exception vector can vary depending on the platform + design in nommu mode. If your platform needs to select + high exception vector, say Y. + Otherwise or if you are unsure, say N, and the low exception + vector (0x00000000~) will be used. + config CPU_ICACHE_DISABLE - bool "Disable I-Cache" - depends on CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020 || CPU_V6 + bool "Disable I-Cache (I-bit)" + depends on CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3) help Say Y here to disable the processor instruction cache. Unless you have a reason not to or are unsure, say N. config CPU_DCACHE_DISABLE - bool "Disable D-Cache" - depends on CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020 || CPU_V6 + bool "Disable D-Cache (C-bit)" + depends on CPU_CP15 help Say Y here to disable the processor data cache. Unless you have a reason not to or are unsure, say N. +config CPU_DCACHE_SIZE + hex + depends on CPU_ARM740T || CPU_ARM946E + default 0x00001000 if CPU_ARM740T + default 0x00002000 # default size for ARM946E-S + help + Some cores are synthesizable to have various sized cache. For + ARM946E-S case, it can vary from 0KB to 1MB. + To support such cache operations, it is efficient to know the size + before compile time. + If your SoC is configured to have a different size, define the value + here with proper conditions. + config CPU_DCACHE_WRITETHROUGH bool "Force write through D-cache" - depends on (CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020 || CPU_V6) && !CPU_DCACHE_DISABLE + depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE default y if CPU_ARM925T help Say Y here to use the data cache in writethrough mode. Unless you @@ -404,41 +766,244 @@ config CPU_DCACHE_WRITETHROUGH config CPU_CACHE_ROUND_ROBIN bool "Round robin I and D cache replacement algorithm" - depends on (CPU_ARM926T || CPU_ARM1020) && (!CPU_ICACHE_DISABLE || !CPU_DCACHE_DISABLE) + depends on (CPU_ARM926T || CPU_ARM946E || CPU_ARM1020) && (!CPU_ICACHE_DISABLE || !CPU_DCACHE_DISABLE) help Say Y here to use the predictable round-robin cache replacement policy. Unless you specifically require this or are unsure, say N. config CPU_BPREDICT_DISABLE bool "Disable branch prediction" - depends on CPU_ARM1020 || CPU_V6 + depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 help Say Y here to disable branch prediction. If unsure, say N. config TLS_REG_EMUL bool - default y if SMP && (CPU_32v5 || CPU_32v4 || CPU_32v3) + select NEED_KUSER_HELPERS help An SMP system using a pre-ARMv6 processor (there are apparently a few prototypes like that in existence) and therefore access to that required register must be emulated. -config HAS_TLS_REG - bool - depends on !TLS_REG_EMUL - default y if SMP || CPU_32v7 - help - This selects support for the CP15 thread register. - It is defined to be available on some ARMv6 processors (including - all SMP capable ARMv6's) or later processors. User space may - assume directly accessing that register and always obtain the - expected value only on ARMv7 and above. - config NEEDS_SYSCALL_FOR_CMPXCHG bool - default y if SMP && (CPU_32v5 || CPU_32v4 || CPU_32v3) + select NEED_KUSER_HELPERS help SMP on a pre-ARMv6 processor? Well OK then. Forget about fast user space cmpxchg support. It is just not possible. +config NEED_KUSER_HELPERS + bool + +config KUSER_HELPERS + bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS + default y + help + Warning: disabling this option may break user programs. + + Provide kuser helpers in the vector page. The kernel provides + helper code to userspace in read only form at a fixed location + in the high vector page to allow userspace to be independent of + the CPU type fitted to the system. This permits binaries to be + run on ARMv4 through to ARMv7 without modification. + + See Documentation/arm/kernel_user_helpers.txt for details. + + However, the fixed address nature of these helpers can be used + by ROP (return orientated programming) authors when creating + exploits. + + If all of the binaries and libraries which run on your platform + are built specifically for your platform, and make no use of + these helpers, then you can turn this option off to hinder + such exploits. However, in that case, if a binary or library + relying on those helpers is run, it will receive a SIGILL signal, + which will terminate the program. + + Say N here only if you are absolutely certain that you do not + need these helpers; otherwise, the safe option is to say Y. + +config DMA_CACHE_RWFO + bool "Enable read/write for ownership DMA cache maintenance" + depends on CPU_V6K && SMP + default y + help + The Snoop Control Unit on ARM11MPCore does not detect the + cache maintenance operations and the dma_{map,unmap}_area() + functions may leave stale cache entries on other CPUs. By + enabling this option, Read or Write For Ownership in the ARMv6 + DMA cache maintenance functions is performed. These LDR/STR + instructions change the cache line state to shared or modified + so that the cache operation has the desired effect. + + Note that the workaround is only valid on processors that do + not perform speculative loads into the D-cache. For such + processors, if cache maintenance operations are not broadcast + in hardware, other workarounds are needed (e.g. cache + maintenance broadcasting in software via FIQ). + +config OUTER_CACHE + bool + +config OUTER_CACHE_SYNC + bool + help + The outer cache has a outer_cache_fns.sync function pointer + that can be used to drain the write buffer of the outer cache. + +config CACHE_FEROCEON_L2 + bool "Enable the Feroceon L2 cache controller" + depends on ARCH_KIRKWOOD || ARCH_MV78XX0 || ARCH_MVEBU + default y + select OUTER_CACHE + help + This option enables the Feroceon L2 cache controller. + +config CACHE_FEROCEON_L2_WRITETHROUGH + bool "Force Feroceon L2 cache write through" + depends on CACHE_FEROCEON_L2 + help + Say Y here to use the Feroceon L2 cache in writethrough mode. + Unless you specifically require this, say N for writeback mode. + +config MIGHT_HAVE_CACHE_L2X0 + bool + help + This option should be selected by machines which have a L2x0 + or PL310 cache controller, but where its use is optional. + + The only effect of this option is to make CACHE_L2X0 and + related options available to the user for configuration. + + Boards or SoCs which always require the cache controller + support to be present should select CACHE_L2X0 directly + instead of this option, thus preventing the user from + inadvertently configuring a broken kernel. + +config CACHE_L2X0 + bool "Enable the L2x0 outer cache controller" if MIGHT_HAVE_CACHE_L2X0 + default MIGHT_HAVE_CACHE_L2X0 + select OUTER_CACHE + select OUTER_CACHE_SYNC + help + This option enables the L2x0 PrimeCell. + +if CACHE_L2X0 + +config CACHE_PL310 + bool + default y if CPU_V7 && !(CPU_V6 || CPU_V6K) + help + This option enables optimisations for the PL310 cache + controller. + +config PL310_ERRATA_588369 + bool "PL310 errata: Clean & Invalidate maintenance operations do not invalidate clean lines" + help + The PL310 L2 cache controller implements three types of Clean & + Invalidate maintenance operations: by Physical Address + (offset 0x7F0), by Index/Way (0x7F8) and by Way (0x7FC). + They are architecturally defined to behave as the execution of a + clean operation followed immediately by an invalidate operation, + both performing to the same memory location. This functionality + is not correctly implemented in PL310 as clean lines are not + invalidated as a result of these operations. + +config PL310_ERRATA_727915 + bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption" + help + PL310 implements the Clean & Invalidate by Way L2 cache maintenance + operation (offset 0x7FC). This operation runs in background so that + PL310 can handle normal accesses while it is in progress. Under very + rare circumstances, due to this erratum, write data can be lost when + PL310 treats a cacheable write transaction during a Clean & + Invalidate by Way operation. + +config PL310_ERRATA_753970 + bool "PL310 errata: cache sync operation may be faulty" + help + This option enables the workaround for the 753970 PL310 (r3p0) erratum. + + Under some condition the effect of cache sync operation on + the store buffer still remains when the operation completes. + This means that the store buffer is always asked to drain and + this prevents it from merging any further writes. The workaround + is to replace the normal offset of cache sync operation (0x730) + by another offset targeting an unmapped PL310 register 0x740. + This has the same effect as the cache sync operation: store buffer + drain and waiting for all buffers empty. + +config PL310_ERRATA_769419 + bool "PL310 errata: no automatic Store Buffer drain" + help + On revisions of the PL310 prior to r3p2, the Store Buffer does + not automatically drain. This can cause normal, non-cacheable + writes to be retained when the memory system is idle, leading + to suboptimal I/O performance for drivers using coherent DMA. + This option adds a write barrier to the cpu_idle loop so that, + on systems with an outer cache, the store buffer is drained + explicitly. + +endif + +config CACHE_TAUROS2 + bool "Enable the Tauros2 L2 cache controller" + depends on (ARCH_DOVE || ARCH_MMP || CPU_PJ4) + default y + select OUTER_CACHE + help + This option enables the Tauros2 L2 cache controller (as + found on PJ1/PJ4). + +config CACHE_XSC3L2 + bool "Enable the L2 cache on XScale3" + depends on CPU_XSC3 + default y + select OUTER_CACHE + help + This option enables the L2 cache on XScale3. + +config ARM_L1_CACHE_SHIFT_6 + bool + default y if CPU_V7 + help + Setting ARM L1 cache line size to 64 Bytes. + +config ARM_L1_CACHE_SHIFT + int + default 6 if ARM_L1_CACHE_SHIFT_6 + default 5 + +config ARM_DMA_MEM_BUFFERABLE + bool "Use non-cacheable memory for DMA" if (CPU_V6 || CPU_V6K) && !CPU_V7 + depends on !(MACH_REALVIEW_PB1176 || REALVIEW_EB_ARM11MP || \ + MACH_REALVIEW_PB11MP) + default y if CPU_V6 || CPU_V6K || CPU_V7 + help + Historically, the kernel has used strongly ordered mappings to + provide DMA coherent memory. With the advent of ARMv7, mapping + memory with differing types results in unpredictable behaviour, + so on these CPUs, this option is forced on. + + Multiple mappings with differing attributes is also unpredictable + on ARMv6 CPUs, but since they do not have aggressive speculative + prefetch, no harm appears to occur. + + However, drivers may be missing the necessary barriers for ARMv6, + and therefore turning this on may result in unpredictable driver + behaviour. Therefore, we offer this as an option. + + You are recommended say 'Y' here and debug any affected drivers. + +config ARCH_HAS_BARRIERS + bool + help + This option allows the use of custom mandatory barriers + included via the mach/barriers.h file. + +config ARCH_SUPPORTS_BIG_ENDIAN + bool + help + This option specifies the architecture can support big endian + operation. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 59f47d4c2df..91da64de440 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -2,48 +2,81 @@ # Makefile for the linux arm-specific parts of the memory manager. # -obj-y := consistent.o extable.o fault-armv.o \ - fault.o flush.o init.o ioremap.o mmap.o \ - mm-armv.o +obj-y := dma-mapping.o extable.o fault.o init.o \ + iomap.o +obj-$(CONFIG_MMU) += fault-armv.o flush.o idmap.o ioremap.o \ + mmap.o pgd.o mmu.o + +ifneq ($(CONFIG_MMU),y) +obj-y += nommu.o +endif + +obj-$(CONFIG_ARM_PTDUMP) += dump.o obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o -obj-$(CONFIG_DISCONTIGMEM) += discontig.o +obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o obj-$(CONFIG_CPU_ABRT_EV4T) += abort-ev4t.o obj-$(CONFIG_CPU_ABRT_LV4T) += abort-lv4t.o obj-$(CONFIG_CPU_ABRT_EV5T) += abort-ev5t.o obj-$(CONFIG_CPU_ABRT_EV5TJ) += abort-ev5tj.o obj-$(CONFIG_CPU_ABRT_EV6) += abort-ev6.o +obj-$(CONFIG_CPU_ABRT_EV7) += abort-ev7.o + +AFLAGS_abort-ev6.o :=-Wa,-march=armv6k +AFLAGS_abort-ev7.o :=-Wa,-march=armv7-a + +obj-$(CONFIG_CPU_PABRT_LEGACY) += pabort-legacy.o +obj-$(CONFIG_CPU_PABRT_V6) += pabort-v6.o +obj-$(CONFIG_CPU_PABRT_V7) += pabort-v7.o -obj-$(CONFIG_CPU_CACHE_V3) += cache-v3.o obj-$(CONFIG_CPU_CACHE_V4) += cache-v4.o obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o +obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o +obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o +obj-$(CONFIG_CPU_CACHE_NOP) += cache-nop.o + +AFLAGS_cache-v6.o :=-Wa,-march=armv6 +AFLAGS_cache-v7.o :=-Wa,-march=armv7-a -obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o -obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o mmu.o +obj-$(CONFIG_CPU_COPY_FEROCEON) += copypage-feroceon.o +obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o context.o obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o +obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o +obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o -obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o +obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o +obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o +obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o + +AFLAGS_tlb-v6.o :=-Wa,-march=armv6 +AFLAGS_tlb-v7.o :=-Wa,-march=armv7-a -obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o -obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o +obj-$(CONFIG_CPU_ARM7TDMI) += proc-arm7tdmi.o obj-$(CONFIG_CPU_ARM720T) += proc-arm720.o +obj-$(CONFIG_CPU_ARM740T) += proc-arm740.o +obj-$(CONFIG_CPU_ARM9TDMI) += proc-arm9tdmi.o obj-$(CONFIG_CPU_ARM920T) += proc-arm920.o obj-$(CONFIG_CPU_ARM922T) += proc-arm922.o obj-$(CONFIG_CPU_ARM925T) += proc-arm925.o obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o +obj-$(CONFIG_CPU_ARM940T) += proc-arm940.o +obj-$(CONFIG_CPU_ARM946E) += proc-arm946.o +obj-$(CONFIG_CPU_FA526) += proc-fa526.o obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o @@ -51,4 +84,19 @@ obj-$(CONFIG_CPU_ARM1026) += proc-arm1026.o obj-$(CONFIG_CPU_SA110) += proc-sa110.o obj-$(CONFIG_CPU_SA1100) += proc-sa1100.o obj-$(CONFIG_CPU_XSCALE) += proc-xscale.o -obj-$(CONFIG_CPU_V6) += proc-v6.o blockops.o +obj-$(CONFIG_CPU_XSC3) += proc-xsc3.o +obj-$(CONFIG_CPU_MOHAWK) += proc-mohawk.o +obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o +obj-$(CONFIG_CPU_V6) += proc-v6.o +obj-$(CONFIG_CPU_V6K) += proc-v6.o +obj-$(CONFIG_CPU_V7) += proc-v7.o +obj-$(CONFIG_CPU_V7M) += proc-v7m.o + +AFLAGS_proc-v6.o :=-Wa,-march=armv6 +AFLAGS_proc-v7.o :=-Wa,-march=armv7-a + +obj-$(CONFIG_OUTER_CACHE) += l2c-common.o +obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o +obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o +obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o +obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S index 4f18f9e87ba..54473cd4aba 100644 --- a/arch/arm/mm/abort-ev4.S +++ b/arch/arm/mm/abort-ev4.S @@ -3,14 +3,11 @@ /* * Function: v4_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -21,10 +18,8 @@ ENTRY(v4_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR - ldr r3, [r2] @ read aborted ARM instruction + ldr r3, [r4] @ read aborted ARM instruction bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR tst r3, #1 << 20 @ L = 1 -> write? orreq r1, r1, #1 << 11 @ yes. - mov pc, lr - - + b do_DataAbort diff --git a/arch/arm/mm/abort-ev4t.S b/arch/arm/mm/abort-ev4t.S index b6282548f92..9da704e7b86 100644 --- a/arch/arm/mm/abort-ev4t.S +++ b/arch/arm/mm/abort-ev4t.S @@ -4,14 +4,11 @@ /* * Function: v4t_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -22,9 +19,9 @@ ENTRY(v4t_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR - do_thumb_abort - ldreq r3, [r2] @ read aborted ARM instruction + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR tst r3, #1 << 20 @ check write orreq r1, r1, #1 << 11 - mov pc, lr + b do_DataAbort diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S index 02251b526c0..a0908d4653a 100644 --- a/arch/arm/mm/abort-ev5t.S +++ b/arch/arm/mm/abort-ev5t.S @@ -4,14 +4,11 @@ /* * Function: v5t_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -22,10 +19,10 @@ ENTRY(v5t_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR - do_thumb_abort - ldreq r3, [r2] @ read aborted ARM instruction + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction bic r1, r1, #1 << 11 @ clear bits 11 of FSR - do_ldrd_abort + do_ldrd_abort tmp=ip, insn=r3 tst r3, #1 << 20 @ check write orreq r1, r1, #1 << 11 - mov pc, lr + b do_DataAbort diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S index bce68d601c8..4006b7a6126 100644 --- a/arch/arm/mm/abort-ev5tj.S +++ b/arch/arm/mm/abort-ev5tj.S @@ -4,14 +4,11 @@ /* * Function: v5tj_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -23,13 +20,11 @@ ENTRY(v5tj_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR - tst r3, #PSR_J_BIT @ Java? - movne pc, lr - do_thumb_abort - ldreq r3, [r2] @ read aborted ARM instruction - do_ldrd_abort + tst r5, #PSR_J_BIT @ Java? + bne do_DataAbort + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction + do_ldrd_abort tmp=ip, insn=r3 tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. - mov pc, lr - - + b do_DataAbort diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index dbd34603312..3815a8262af 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -4,14 +4,11 @@ /* * Function: v6_early_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4 - r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -20,25 +17,31 @@ */ .align 5 ENTRY(v6_early_abort) -#ifdef CONFIG_CPU_MPCORE +#ifdef CONFIG_CPU_V6 + sub r1, sp, #4 @ Get unused stack location + strex r0, r1, [r1] @ Clear the exclusive monitor +#elif defined(CONFIG_CPU_32v6K) clrex -#else - strex r0, r1, [sp] @ Clear the exclusive monitor #endif mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR /* * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR. - * The test below covers all the write situations, including Java bytecodes */ - bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR - tst r3, #PSR_J_BIT @ Java? - movne pc, lr - do_thumb_abort - ldreq r3, [r2] @ read aborted ARM instruction - do_ldrd_abort +#ifdef CONFIG_ARM_ERRATA_326103 + ldr ip, =0x4107b36 + mrc p15, 0, r3, c0, c0, 0 @ get processor id + teq ip, r3, lsr #4 @ r0 ARM1136? + bne do_DataAbort + tst r5, #PSR_J_BIT @ Java? + tsteq r5, #PSR_T_BIT @ Thumb? + bne do_DataAbort + bic r1, r1, #1 << 11 @ clear bit 11 of FSR + ldr r3, [r4] @ read aborted ARM instruction + ARM_BE8(rev r3, r3) + + do_ldrd_abort tmp=ip, insn=r3 tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. - mov pc, lr - - +#endif + b do_DataAbort diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S new file mode 100644 index 00000000000..703375277ba --- /dev/null +++ b/arch/arm/mm/abort-ev7.S @@ -0,0 +1,51 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: v7_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + */ + .align 5 +ENTRY(v7_early_abort) + /* + * The effect of data aborts on on the exclusive access monitor are + * UNPREDICTABLE. Do a CLREX to clear the state + */ + clrex + + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + + /* + * V6 code adjusts the returned DFSR. + * New designs should not need to patch up faults. + */ + +#if defined(CONFIG_VERIFY_PERMISSION_FAULT) + /* + * Detect erroneous permission failures and fix + */ + ldr r3, =0x40d @ On permission fault + and r3, r1, r3 + cmp r3, #0x0d + bne do_DataAbort + + mcr p15, 0, r0, c7, c8, 0 @ Retranslate FAR + isb + mrc p15, 0, ip, c7, c4, 0 @ Read the PAR + and r3, ip, #0x7b @ On translation fault + cmp r3, #0x0b + bne do_DataAbort + bic r1, r1, #0xf @ Fix up FSR FS[5:0] + and ip, ip, #0x7e + orr r1, r1, ip, LSR #1 +#endif + + b do_DataAbort +ENDPROC(v7_early_abort) diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S index db743e51021..f3982580c27 100644 --- a/arch/arm/mm/abort-lv4t.S +++ b/arch/arm/mm/abort-lv4t.S @@ -3,14 +3,11 @@ /* * Function: v4t_late_abort * - * Params : r2 = address of aborted instruction - * : r3 = saved SPSR + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr * - * Returns : r0 = address of abort - * : r1 = FSR, bit 11 = write - * : r2-r8 = corrupted - * : r9 = preserved - * : sp = pointer to registers + * Returns : r4-r5, r10-r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -18,12 +15,17 @@ * picture. Unfortunately, this does happen. We live with it. */ ENTRY(v4t_late_abort) - tst r3, #PSR_T_BIT @ check for thumb mode + tst r5, #PSR_T_BIT @ check for thumb mode +#ifdef CONFIG_CPU_CP15_MMU mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR - bne .data_thumb_abort - ldr r8, [r2] @ read arm instruction bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR +#else + mov r0, #0 @ clear r0, r1 (no FSR/FAR) + mov r1, #0 +#endif + bne .data_thumb_abort + ldr r8, [r4] @ read arm instruction tst r8, #1 << 20 @ L = 1 -> write? orreq r1, r1, #1 << 11 @ yes. and r7, r8, #15 << 24 @@ -42,86 +44,84 @@ ENTRY(v4t_late_abort) /* 9 */ b .data_arm_ldmstm @ ldm*b rn, <rlist> /* a */ b .data_unknown /* b */ b .data_unknown -/* c */ mov pc, lr @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m -/* d */ mov pc, lr @ ldc rd, [rn, #m] +/* c */ b do_DataAbort @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m +/* d */ b do_DataAbort @ ldc rd, [rn, #m] /* e */ b .data_unknown /* f */ .data_unknown: @ Part of jumptable - mov r0, r2 + mov r0, r4 mov r1, r8 - mov r2, sp - bl baddataabort - b ret_from_exception + b baddataabort .data_arm_ldmstm: tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup + beq do_DataAbort @ no writeback -> no fixup mov r7, #0x11 orr r7, r7, #0x1100 and r6, r8, r7 - and r2, r8, r7, lsl #1 - add r6, r6, r2, lsr #1 - and r2, r8, r7, lsl #2 - add r6, r6, r2, lsr #2 - and r2, r8, r7, lsl #3 - add r6, r6, r2, lsr #3 + and r9, r8, r7, lsl #1 + add r6, r6, r9, lsr #1 + and r9, r8, r7, lsl #2 + add r6, r6, r9, lsr #2 + and r9, r8, r7, lsl #3 + add r6, r6, r9, lsr #3 add r6, r6, r6, lsr #8 add r6, r6, r6, lsr #4 and r6, r6, #15 @ r6 = no. of registers to transfer. - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' tst r8, #1 << 23 @ Check U bit subne r7, r7, r6, lsl #2 @ Undo increment addeq r7, r7, r6, lsl #2 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort .data_arm_lateldrhpre: tst r8, #1 << 21 @ Check writeback bit - moveq pc, lr @ No writeback -> no fixup + beq do_DataAbort @ No writeback -> no fixup .data_arm_lateldrhpost: - and r5, r8, #0x00f @ get Rm / low nibble of immediate value + and r9, r8, #0x00f @ get Rm / low nibble of immediate value tst r8, #1 << 22 @ if (immediate offset) andne r6, r8, #0xf00 @ { immediate high nibble - orrne r6, r5, r6, lsr #4 @ combine nibbles } else - ldreq r6, [sp, r5, lsl #2] @ { load Rm value } + orrne r6, r9, r6, lsr #4 @ combine nibbles } else + ldreq r6, [r2, r9, lsl #2] @ { load Rm value } .data_arm_apply_r6_and_rn: - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' tst r8, #1 << 23 @ Check U bit subne r7, r7, r6 @ Undo incrmenet addeq r7, r7, r6 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort .data_arm_lateldrpreconst: tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup + beq do_DataAbort @ no writeback -> no fixup .data_arm_lateldrpostconst: - movs r2, r8, lsl #20 @ Get offset - moveq pc, lr @ zero -> no fixup - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' + movs r6, r8, lsl #20 @ Get offset + beq do_DataAbort @ zero -> no fixup + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' tst r8, #1 << 23 @ Check U bit - subne r7, r7, r2, lsr #20 @ Undo increment - addeq r7, r7, r2, lsr #20 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr + subne r7, r7, r6, lsr #20 @ Undo increment + addeq r7, r7, r6, lsr #20 @ Undo decrement + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort .data_arm_lateldrprereg: tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup + beq do_DataAbort @ no writeback -> no fixup .data_arm_lateldrpostreg: and r7, r8, #15 @ Extract 'm' from instruction - ldr r6, [sp, r7, lsl #2] @ Get register 'Rm' - mov r5, r8, lsr #7 @ get shift count - ands r5, r5, #31 + ldr r6, [r2, r7, lsl #2] @ Get register 'Rm' + mov r9, r8, lsr #7 @ get shift count + ands r9, r9, #31 and r7, r8, #0x70 @ get shift type orreq r7, r7, #8 @ shift count = 0 add pc, pc, r7 nop - mov r6, r6, lsl r5 @ 0: LSL #!0 + mov r6, r6, lsl r9 @ 0: LSL #!0 b .data_arm_apply_r6_and_rn b .data_arm_apply_r6_and_rn @ 1: LSL #0 nop @@ -129,7 +129,7 @@ ENTRY(v4t_late_abort) nop b .data_unknown @ 3: MUL? nop - mov r6, r6, lsr r5 @ 4: LSR #!0 + mov r6, r6, lsr r9 @ 4: LSR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, lsr #32 @ 5: LSR #32 b .data_arm_apply_r6_and_rn @@ -137,7 +137,7 @@ ENTRY(v4t_late_abort) nop b .data_unknown @ 7: MUL? nop - mov r6, r6, asr r5 @ 8: ASR #!0 + mov r6, r6, asr r9 @ 8: ASR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, asr #32 @ 9: ASR #32 b .data_arm_apply_r6_and_rn @@ -145,7 +145,7 @@ ENTRY(v4t_late_abort) nop b .data_unknown @ B: MUL? nop - mov r6, r6, ror r5 @ C: ROR #!0 + mov r6, r6, ror r9 @ C: ROR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, rrx @ D: RRX b .data_arm_apply_r6_and_rn @@ -154,7 +154,7 @@ ENTRY(v4t_late_abort) b .data_unknown @ F: MUL? .data_thumb_abort: - ldrh r8, [r2] @ read instruction + ldrh r8, [r4] @ read instruction tst r8, #1 << 11 @ L = 1 -> write? orreq r1, r1, #1 << 8 @ yes and r7, r8, #15 << 12 @@ -167,10 +167,10 @@ ENTRY(v4t_late_abort) /* 3 */ b .data_unknown /* 4 */ b .data_unknown /* 5 */ b .data_thumb_reg -/* 6 */ mov pc, lr -/* 7 */ mov pc, lr -/* 8 */ mov pc, lr -/* 9 */ mov pc, lr +/* 6 */ b do_DataAbort +/* 7 */ b do_DataAbort +/* 8 */ b do_DataAbort +/* 9 */ b do_DataAbort /* A */ b .data_unknown /* B */ b .data_thumb_pushpop /* C */ b .data_thumb_ldmstm @@ -180,41 +180,41 @@ ENTRY(v4t_late_abort) .data_thumb_reg: tst r8, #1 << 9 - moveq pc, lr + beq do_DataAbort tst r8, #1 << 10 @ If 'S' (signed) bit is set movne r1, #0 @ it must be a load instr - mov pc, lr + b do_DataAbort .data_thumb_pushpop: tst r8, #1 << 10 beq .data_unknown and r6, r8, #0x55 @ hweight8(r8) + R bit - and r2, r8, #0xaa - add r6, r6, r2, lsr #1 - and r2, r6, #0xcc + and r9, r8, #0xaa + add r6, r6, r9, lsr #1 + and r9, r6, #0xcc and r6, r6, #0x33 - add r6, r6, r2, lsr #2 + add r6, r6, r9, lsr #2 movs r7, r8, lsr #9 @ C = r8 bit 8 (R bit) adc r6, r6, r6, lsr #4 @ high + low nibble + R bit and r6, r6, #15 @ number of regs to transfer - ldr r7, [sp, #13 << 2] + ldr r7, [r2, #13 << 2] tst r8, #1 << 11 addeq r7, r7, r6, lsl #2 @ increment SP if PUSH subne r7, r7, r6, lsl #2 @ decrement SP if POP - str r7, [sp, #13 << 2] - mov pc, lr + str r7, [r2, #13 << 2] + b do_DataAbort .data_thumb_ldmstm: and r6, r8, #0x55 @ hweight8(r8) - and r2, r8, #0xaa - add r6, r6, r2, lsr #1 - and r2, r6, #0xcc + and r9, r8, #0xaa + add r6, r6, r9, lsr #1 + and r9, r6, #0xcc and r6, r6, #0x33 - add r6, r6, r2, lsr #2 + add r6, r6, r9, lsr #2 add r6, r6, r6, lsr #4 - and r5, r8, #7 << 8 - ldr r7, [sp, r5, lsr #6] + and r9, r8, #7 << 8 + ldr r7, [r2, r9, lsr #6] and r6, r6, #15 @ number of regs to transfer sub r7, r7, r6, lsl #2 @ always decrement - str r7, [sp, r5, lsr #6] - mov pc, lr + str r7, [r2, r9, lsr #6] + b do_DataAbort diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S index d7cb1bfa51a..2cbf68ef0e8 100644 --- a/arch/arm/mm/abort-macro.S +++ b/arch/arm/mm/abort-macro.S @@ -9,34 +9,32 @@ * */ - .macro do_thumb_abort - tst r3, #PSR_T_BIT + .macro do_thumb_abort, fsr, pc, psr, tmp + tst \psr, #PSR_T_BIT beq not_thumb - ldrh r3, [r2] @ Read aborted Thumb instruction - and r3, r3, # 0xfe00 @ Mask opcode field - cmp r3, # 0x5600 @ Is it ldrsb? - orreq r3, r3, #1 << 11 @ Set L-bit if yes - tst r3, #1 << 11 @ L = 0 -> write - orreq r1, r1, #1 << 11 @ yes. - mov pc, lr + ldrh \tmp, [\pc] @ Read aborted Thumb instruction + and \tmp, \tmp, # 0xfe00 @ Mask opcode field + cmp \tmp, # 0x5600 @ Is it ldrsb? + orreq \tmp, \tmp, #1 << 11 @ Set L-bit if yes + tst \tmp, #1 << 11 @ L = 0 -> write + orreq \fsr, \fsr, #1 << 11 @ yes. + b do_DataAbort not_thumb: .endm /* - * We check for the following insturction encoding for LDRD. + * We check for the following instruction encoding for LDRD. * - * [27:25] == 0 + * [27:25] == 000 * [7:4] == 1101 * [20] == 0 */ - .macro do_ldrd_abort - tst r3, #0x0e000000 @ [27:25] == 0 + .macro do_ldrd_abort, tmp, insn + tst \insn, #0x0e100000 @ [27:25,20] == 0 bne not_ldrd - and r2, r3, #0x000000f0 @ [7:4] == 1101 - cmp r2, #0x000000d0 - bne not_ldrd - tst r3, #1 << 20 @ [20] == 0 - moveq pc, lr + and \tmp, \insn, #0x000000f0 @ [7:4] == 1101 + cmp \tmp, #0x000000d0 + beq do_DataAbort not_ldrd: .endm diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S new file mode 100644 index 00000000000..119cb479c2a --- /dev/null +++ b/arch/arm/mm/abort-nommu.S @@ -0,0 +1,20 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: nommu_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Note: There is no FSR/FAR on !CPU_CP15_MMU cores. + * Just fill zero into the registers. + */ + .align 5 +ENTRY(nommu_early_abort) + mov r0, #0 @ clear r0, r1 (no FSR/FAR) + mov r1, #0 + b do_DataAbort +ENDPROC(nommu_early_abort) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 705c98921c3..b8cb1a2688a 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -3,7 +3,7 @@ * * Copyright (C) 1995 Linus Torvalds * Modifications for ARM processor (c) 1995-2001 Russell King - * Thumb aligment fault fixups (c) 2004 MontaVista Software, Inc. + * Thumb alignment fault fixups (c) 2004 MontaVista Software, Inc. * - Adapted from gdb/sim/arm/thumbemu.c -- Thumb instruction emulation. * Copyright (C) 1996, Cygnus Software Technologies Ltd. * @@ -11,19 +11,24 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/config.h> +#include <linux/moduleparam.h> #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> -#include <linux/ptrace.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/init.h> +#include <linux/sched.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> +#include <asm/cp15.h> +#include <asm/system_info.h> #include <asm/unaligned.h> +#include <asm/opcodes.h> #include "fault.h" +#include "mm.h" /* * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998 @@ -63,6 +68,12 @@ #define SHIFT_ASR 0x40 #define SHIFT_RORRRX 0x60 +#define BAD_INSTR 0xdeadc0de + +/* Thumb-2 32 bit format per ARMv7 DDI0406A A6.3, either f800h,e800h,f800h */ +#define IS_T32(hi16) \ + (((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800)) + static unsigned long ai_user; static unsigned long ai_sys; static unsigned long ai_skipped; @@ -71,6 +82,40 @@ static unsigned long ai_word; static unsigned long ai_dword; static unsigned long ai_multi; static int ai_usermode; +static unsigned long cr_no_alignment; + +core_param(alignment, ai_usermode, int, 0600); + +#define UM_WARN (1 << 0) +#define UM_FIXUP (1 << 1) +#define UM_SIGNAL (1 << 2) + +/* Return true if and only if the ARMv6 unaligned access model is in use. */ +static bool cpu_is_v6_unaligned(void) +{ + return cpu_architecture() >= CPU_ARCH_ARMv6 && get_cr() & CR_U; +} + +static int safe_usermode(int new_usermode, bool warn) +{ + /* + * ARMv6 and later CPUs can perform unaligned accesses for + * most single load and store instructions up to word size. + * LDM, STM, LDRD and STRD still need to be handled. + * + * Ignoring the alignment fault is not an option on these + * CPUs since we spin re-faulting the instruction without + * making any progress. + */ + if (cpu_is_v6_unaligned() && !(new_usermode & (UM_FIXUP | UM_SIGNAL))) { + new_usermode |= UM_FIXUP; + + if (warn) + printk(KERN_WARNING "alignment: ignoring faults is unsafe on this CPU. Defaulting to fixup mode.\n"); + } + + return new_usermode; +} #ifdef CONFIG_PROC_FS static const char *usermode_action[] = { @@ -82,36 +127,29 @@ static const char *usermode_action[] = { "signal+warn" }; -static int -proc_alignment_read(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int alignment_proc_show(struct seq_file *m, void *v) { - char *p = page; - int len; - - p += sprintf(p, "User:\t\t%lu\n", ai_user); - p += sprintf(p, "System:\t\t%lu\n", ai_sys); - p += sprintf(p, "Skipped:\t%lu\n", ai_skipped); - p += sprintf(p, "Half:\t\t%lu\n", ai_half); - p += sprintf(p, "Word:\t\t%lu\n", ai_word); + seq_printf(m, "User:\t\t%lu\n", ai_user); + seq_printf(m, "System:\t\t%lu\n", ai_sys); + seq_printf(m, "Skipped:\t%lu\n", ai_skipped); + seq_printf(m, "Half:\t\t%lu\n", ai_half); + seq_printf(m, "Word:\t\t%lu\n", ai_word); if (cpu_architecture() >= CPU_ARCH_ARMv5TE) - p += sprintf(p, "DWord:\t\t%lu\n", ai_dword); - p += sprintf(p, "Multi:\t\t%lu\n", ai_multi); - p += sprintf(p, "User faults:\t%i (%s)\n", ai_usermode, + seq_printf(m, "DWord:\t\t%lu\n", ai_dword); + seq_printf(m, "Multi:\t\t%lu\n", ai_multi); + seq_printf(m, "User faults:\t%i (%s)\n", ai_usermode, usermode_action[ai_usermode]); - len = (p - page) - off; - if (len < 0) - len = 0; - - *eof = (len <= count) ? 1 : 0; - *start = page + off; + return 0; +} - return len; +static int alignment_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, alignment_proc_show, NULL); } -static int proc_alignment_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t alignment_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { char mode; @@ -119,11 +157,18 @@ static int proc_alignment_write(struct file *file, const char __user *buffer, if (get_user(mode, buffer)) return -EFAULT; if (mode >= '0' && mode <= '5') - ai_usermode = mode - '0'; + ai_usermode = safe_usermode(mode - '0', true); } return count; } +static const struct file_operations alignment_proc_fops = { + .open = alignment_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = alignment_proc_write, +}; #endif /* CONFIG_PROC_FS */ union offset_union { @@ -150,17 +195,19 @@ union offset_union { #define __get8_unaligned_check(ins,val,addr,err) \ __asm__( \ - "1: "ins" %1, [%2], #1\n" \ + ARM( "1: "ins" %1, [%2], #1\n" ) \ + THUMB( "1: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ "2:\n" \ - " .section .fixup,\"ax\"\n" \ + " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %0, #1\n" \ " b 2b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ " .align 3\n" \ " .long 1b, 3b\n" \ - " .previous\n" \ + " .popsection\n" \ : "=r" (err), "=&r" (val), "=r" (addr) \ : "0" (err), "2" (addr)) @@ -206,20 +253,22 @@ union offset_union { do { \ unsigned int err = 0, v = val, a = addr; \ __asm__( FIRST_BYTE_16 \ - "1: "ins" %1, [%2], #1\n" \ + ARM( "1: "ins" %1, [%2], #1\n" ) \ + THUMB( "1: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ " mov %1, %1, "NEXT_BYTE"\n" \ "2: "ins" %1, [%2]\n" \ "3:\n" \ - " .section .fixup,\"ax\"\n" \ + " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ "4: mov %0, #1\n" \ " b 3b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ " .align 3\n" \ " .long 1b, 4b\n" \ " .long 2b, 4b\n" \ - " .previous\n" \ + " .popsection\n" \ : "=r" (err), "=&r" (v), "=&r" (a) \ : "0" (err), "1" (v), "2" (a)); \ if (err) \ @@ -236,26 +285,32 @@ union offset_union { do { \ unsigned int err = 0, v = val, a = addr; \ __asm__( FIRST_BYTE_32 \ - "1: "ins" %1, [%2], #1\n" \ + ARM( "1: "ins" %1, [%2], #1\n" ) \ + THUMB( "1: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ " mov %1, %1, "NEXT_BYTE"\n" \ - "2: "ins" %1, [%2], #1\n" \ + ARM( "2: "ins" %1, [%2], #1\n" ) \ + THUMB( "2: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ " mov %1, %1, "NEXT_BYTE"\n" \ - "3: "ins" %1, [%2], #1\n" \ + ARM( "3: "ins" %1, [%2], #1\n" ) \ + THUMB( "3: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ " mov %1, %1, "NEXT_BYTE"\n" \ "4: "ins" %1, [%2]\n" \ "5:\n" \ - " .section .fixup,\"ax\"\n" \ + " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ "6: mov %0, #1\n" \ " b 5b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ " .align 3\n" \ " .long 1b, 6b\n" \ " .long 2b, 6b\n" \ " .long 3b, 6b\n" \ " .long 4b, 6b\n" \ - " .previous\n" \ + " .popsection\n" \ : "=r" (err), "=&r" (v), "=&r" (a) \ : "0" (err), "1" (v), "2" (a)); \ if (err) \ @@ -329,38 +384,48 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, struct pt_regs *regs) { unsigned int rd = RD_BITS(instr); - - if (((rd & 1) == 1) || (rd == 14)) + unsigned int rd2; + int load; + + if ((instr & 0xfe000000) == 0xe8000000) { + /* ARMv7 Thumb-2 32-bit LDRD/STRD */ + rd2 = (instr >> 8) & 0xf; + load = !!(LDST_L_BIT(instr)); + } else if (((rd & 1) == 1) || (rd == 14)) goto bad; + else { + load = ((instr & 0xf0) == 0xd0); + rd2 = rd + 1; + } ai_dword += 1; if (user_mode(regs)) goto user; - if ((instr & 0xf0) == 0xd0) { + if (load) { unsigned long val; get32_unaligned_check(val, addr); regs->uregs[rd] = val; get32_unaligned_check(val, addr + 4); - regs->uregs[rd + 1] = val; + regs->uregs[rd2] = val; } else { put32_unaligned_check(regs->uregs[rd], addr); - put32_unaligned_check(regs->uregs[rd + 1], addr + 4); + put32_unaligned_check(regs->uregs[rd2], addr + 4); } return TYPE_LDST; user: - if ((instr & 0xf0) == 0xd0) { + if (load) { unsigned long val; get32t_unaligned_check(val, addr); regs->uregs[rd] = val; get32t_unaligned_check(val, addr + 4); - regs->uregs[rd + 1] = val; + regs->uregs[rd2] = val; } else { put32t_unaligned_check(regs->uregs[rd], addr); - put32t_unaligned_check(regs->uregs[rd + 1], addr + 4); + put32t_unaligned_check(regs->uregs[rd2], addr + 4); } return TYPE_LDST; @@ -613,32 +678,112 @@ thumb2arm(u16 tinstr) /* Else fall through for illegal instruction case */ default: - return 0xdeadc0de; + return BAD_INSTR; } } +/* + * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction + * handlable by ARM alignment handler, also find the corresponding handler, + * so that we can reuse ARM userland alignment fault fixups for Thumb. + * + * @pinstr: original Thumb-2 instruction; returns new handlable instruction + * @regs: register context. + * @poffset: return offset from faulted addr for later writeback + * + * NOTES: + * 1. Comments below refer to ARMv7 DDI0406A Thumb Instruction sections. + * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt) + */ +static void * +do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, + union offset_union *poffset) +{ + unsigned long instr = *pinstr; + u16 tinst1 = (instr >> 16) & 0xffff; + u16 tinst2 = instr & 0xffff; + + switch (tinst1 & 0xffe0) { + /* A6.3.5 Load/Store multiple */ + case 0xe880: /* STM/STMIA/STMEA,LDM/LDMIA, PUSH/POP T2 */ + case 0xe8a0: /* ...above writeback version */ + case 0xe900: /* STMDB/STMFD, LDMDB/LDMEA */ + case 0xe920: /* ...above writeback version */ + /* no need offset decision since handler calculates it */ + return do_alignment_ldmstm; + + case 0xf840: /* POP/PUSH T3 (single register) */ + if (RN_BITS(instr) == 13 && (tinst2 & 0x09ff) == 0x0904) { + u32 L = !!(LDST_L_BIT(instr)); + const u32 subset[2] = { + 0xe92d0000, /* STMDB sp!,{registers} */ + 0xe8bd0000, /* LDMIA sp!,{registers} */ + }; + *pinstr = subset[L] | (1<<RD_BITS(instr)); + return do_alignment_ldmstm; + } + /* Else fall through for illegal instruction case */ + break; + + /* A6.3.6 Load/store double, STRD/LDRD(immed, lit, reg) */ + case 0xe860: + case 0xe960: + case 0xe8e0: + case 0xe9e0: + poffset->un = (tinst2 & 0xff) << 2; + case 0xe940: + case 0xe9c0: + return do_alignment_ldrdstrd; + + /* + * No need to handle load/store instructions up to word size + * since ARMv6 and later CPUs can perform unaligned accesses. + */ + default: + break; + } + return NULL; +} + static int do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - union offset_union offset; + union offset_union uninitialized_var(offset); unsigned long instr = 0, instrptr; int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs); unsigned int type; - mm_segment_t fs; unsigned int fault; u16 tinstr = 0; + int isize = 4; + int thumb2_32b = 0; + + if (interrupts_enabled(regs)) + local_irq_enable(); instrptr = instruction_pointer(regs); - fs = get_fs(); - set_fs(KERNEL_DS); - if thumb_mode(regs) { - fault = __get_user(tinstr, (u16 *)(instrptr & ~1)); - if (!(fault)) - instr = thumb2arm(tinstr); - } else - fault = __get_user(instr, (u32 *)instrptr); - set_fs(fs); + if (thumb_mode(regs)) { + u16 *ptr = (u16 *)(instrptr & ~1); + fault = probe_kernel_address(ptr, tinstr); + tinstr = __mem_to_opcode_thumb16(tinstr); + if (!fault) { + if (cpu_architecture() >= CPU_ARCH_ARMv7 && + IS_T32(tinstr)) { + /* Thumb-2 32-bit */ + u16 tinst2 = 0; + fault = probe_kernel_address(ptr + 1, tinst2); + tinst2 = __mem_to_opcode_thumb16(tinst2); + instr = __opcode_thumb32_compose(tinstr, tinst2); + thumb2_32b = 1; + } else { + isize = 2; + instr = thumb2arm(tinstr); + } + } + } else { + fault = probe_kernel_address(instrptr, instr); + instr = __mem_to_opcode_arm(instr); + } if (fault) { type = TYPE_FAULT; @@ -652,7 +797,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) fixup: - regs->ARM_pc += thumb_mode(regs) ? 2 : 4; + regs->ARM_pc += isize; switch (CODING_BITS(instr)) { case 0x00000000: /* 3.13.4 load/store instruction extensions */ @@ -711,18 +856,28 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) handler = do_alignment_ldrstr; break; - case 0x08000000: /* ldm or stm */ - handler = do_alignment_ldmstm; + case 0x08000000: /* ldm or stm, or thumb-2 32bit instruction */ + if (thumb2_32b) { + offset.un = 0; + handler = do_alignment_t32_to_handler(&instr, regs, &offset); + } else { + offset.un = 0; + handler = do_alignment_ldmstm; + } break; default: goto bad; } + if (!handler) + goto bad; type = handler(addr, instr, regs); - if (type == TYPE_ERROR || type == TYPE_FAULT) + if (type == TYPE_ERROR || type == TYPE_FAULT) { + regs->ARM_pc -= isize; goto bad_or_fault; + } if (type == TYPE_LDST) do_alignment_finish_ldst(addr, instr, regs, offset); @@ -732,11 +887,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) bad_or_fault: if (type == TYPE_ERROR) goto bad; - regs->ARM_pc -= thumb_mode(regs) ? 2 : 4; /* * We got a fault - fix it up, or die. */ - do_bad_area(current, current->mm, addr, fsr, regs); + do_bad_area(addr, fsr, regs); return 0; swp: @@ -748,33 +902,62 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) */ printk(KERN_ERR "Alignment trap: not handling instruction " "%0*lx at [<%08lx>]\n", - thumb_mode(regs) ? 4 : 8, - thumb_mode(regs) ? tinstr : instr, instrptr); + isize << 1, + isize == 2 ? tinstr : instr, instrptr); ai_skipped += 1; return 1; user: ai_user += 1; - if (ai_usermode & 1) + if (ai_usermode & UM_WARN) printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx " "Address=0x%08lx FSR 0x%03x\n", current->comm, - current->pid, instrptr, - thumb_mode(regs) ? 4 : 8, - thumb_mode(regs) ? tinstr : instr, + task_pid_nr(current), instrptr, + isize << 1, + isize == 2 ? tinstr : instr, addr, fsr); - if (ai_usermode & 2) + if (ai_usermode & UM_FIXUP) goto fixup; - if (ai_usermode & 4) - force_sig(SIGBUS, current); - else - set_cr(cr_no_alignment); + if (ai_usermode & UM_SIGNAL) { + siginfo_t si; + + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRALN; + si.si_addr = (void __user *)addr; + + force_sig_info(si.si_signo, &si, current); + } else { + /* + * We're about to disable the alignment trap and return to + * user space. But if an interrupt occurs before actually + * reaching user space, then the IRQ vector entry code will + * notice that we were still in kernel space and therefore + * the alignment trap won't be re-enabled in that case as it + * is presumed to be always on from kernel space. + * Let's prevent that race by disabling interrupts here (they + * are disabled on the way back to user space anyway in + * entry-common.S) and disable the alignment trap only if + * there is no work pending for this thread. + */ + raw_local_irq_disable(); + if (!(current_thread_info()->flags & _TIF_WORK_MASK)) + set_cr(cr_no_alignment); + } return 0; } +static int __init noalign_setup(char *__unused) +{ + set_cr(__clear_cr(CR_A)); + return 1; +} +__setup("noalign", noalign_setup); + /* * This needs to be done after sysctl_init, otherwise sys/ will be * overwritten. Actually, this shouldn't be in sys/ at all since @@ -786,20 +969,33 @@ static int __init alignment_init(void) #ifdef CONFIG_PROC_FS struct proc_dir_entry *res; - res = proc_mkdir("cpu", NULL); + res = proc_create("cpu/alignment", S_IWUSR | S_IRUGO, NULL, + &alignment_proc_fops); if (!res) return -ENOMEM; +#endif - res = create_proc_entry("alignment", S_IWUSR | S_IRUGO, res); - if (!res) - return -ENOMEM; + if (cpu_is_v6_unaligned()) { + set_cr(__clear_cr(CR_A)); + ai_usermode = safe_usermode(ai_usermode, false); + } - res->read_proc = proc_alignment_read; - res->write_proc = proc_alignment_write; -#endif + cr_no_alignment = get_cr() & ~CR_A; - hook_fault_code(1, do_alignment, SIGILL, "alignment exception"); - hook_fault_code(3, do_alignment, SIGILL, "alignment exception"); + hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN, + "alignment exception"); + + /* + * ARMv6K and ARMv7 use fault status 3 (0b00011) as Access Flag section + * fault, not as alignment error. + * + * TODO: handle ARMv6K properly. Runtime check for 'K' extension is + * needed. + */ + if (cpu_architecture() <= CPU_ARCH_ARMv6) { + hook_fault_code(3, do_alignment, SIGBUS, BUS_ADRALN, + "alignment exception"); + } return 0; } diff --git a/arch/arm/mm/blockops.c b/arch/arm/mm/blockops.c deleted file mode 100644 index 4f5ee2d0899..00000000000 --- a/arch/arm/mm/blockops.c +++ /dev/null @@ -1,185 +0,0 @@ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/errno.h> -#include <linux/mm.h> - -#include <asm/memory.h> -#include <asm/ptrace.h> -#include <asm/cacheflush.h> -#include <asm/traps.h> - -extern struct cpu_cache_fns blk_cache_fns; - -#define HARVARD_CACHE - -/* - * blk_flush_kern_dcache_page(kaddr) - * - * Ensure that the data held in the page kaddr is written back - * to the page in question. - * - * - kaddr - kernel address (guaranteed to be page aligned) - */ -static void __attribute__((naked)) -blk_flush_kern_dcache_page(void *kaddr) -{ - asm( - "add r1, r0, %0 \n\ - sub r1, r1, %1 \n\ -1: .word 0xec401f0e @ mcrr p15, 0, r0, r1, c14, 0 @ blocking \n\ - mov r0, #0 \n\ - mcr p15, 0, r0, c7, c5, 0 \n\ - mcr p15, 0, r0, c7, c10, 4 \n\ - mov pc, lr" - : - : "I" (PAGE_SIZE), "I" (L1_CACHE_BYTES)); -} - -/* - * blk_dma_inv_range(start,end) - * - * Invalidate the data cache within the specified region; we will - * be performing a DMA operation in this region and we want to - * purge old data in the cache. - * - * - start - virtual start address of region - * - end - virtual end address of region - */ -static void __attribute__((naked)) -blk_dma_inv_range_unified(unsigned long start, unsigned long end) -{ - asm( - "tst r0, %0 \n\ - mcrne p15, 0, r0, c7, c11, 1 @ clean unified line \n\ - tst r1, %0 \n\ - mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line\n\ - .word 0xec401f06 @ mcrr p15, 0, r1, r0, c6, 0 @ blocking \n\ - mov r0, #0 \n\ - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer \n\ - mov pc, lr" - : - : "I" (L1_CACHE_BYTES - 1)); -} - -static void __attribute__((naked)) -blk_dma_inv_range_harvard(unsigned long start, unsigned long end) -{ - asm( - "tst r0, %0 \n\ - mcrne p15, 0, r0, c7, c10, 1 @ clean D line \n\ - tst r1, %0 \n\ - mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line \n\ - .word 0xec401f06 @ mcrr p15, 0, r1, r0, c6, 0 @ blocking \n\ - mov r0, #0 \n\ - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer \n\ - mov pc, lr" - : - : "I" (L1_CACHE_BYTES - 1)); -} - -/* - * blk_dma_clean_range(start,end) - * - start - virtual start address of region - * - end - virtual end address of region - */ -static void __attribute__((naked)) -blk_dma_clean_range(unsigned long start, unsigned long end) -{ - asm( - ".word 0xec401f0c @ mcrr p15, 0, r1, r0, c12, 0 @ blocking \n\ - mov r0, #0 \n\ - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer \n\ - mov pc, lr"); -} - -/* - * blk_dma_flush_range(start,end) - * - start - virtual start address of region - * - end - virtual end address of region - */ -static void __attribute__((naked)) -blk_dma_flush_range(unsigned long start, unsigned long end) -{ - asm( - ".word 0xec401f0e @ mcrr p15, 0, r1, r0, c14, 0 @ blocking \n\ - mov pc, lr"); -} - -static int blockops_trap(struct pt_regs *regs, unsigned int instr) -{ - regs->ARM_r4 |= regs->ARM_r2; - regs->ARM_pc += 4; - return 0; -} - -static char *func[] = { - "Prefetch data range", - "Clean+Invalidate data range", - "Clean data range", - "Invalidate data range", - "Invalidate instr range" -}; - -static struct undef_hook blockops_hook __initdata = { - .instr_mask = 0x0fffffd0, - .instr_val = 0x0c401f00, - .cpsr_mask = PSR_T_BIT, - .cpsr_val = 0, - .fn = blockops_trap, -}; - -static int __init blockops_check(void) -{ - register unsigned int err asm("r4") = 0; - unsigned int err_pos = 1; - unsigned int cache_type; - int i; - - asm("mrc p15, 0, %0, c0, c0, 1" : "=r" (cache_type)); - - printk("Checking V6 block cache operations:\n"); - register_undef_hook(&blockops_hook); - - __asm__ ("mov r0, %0\n\t" - "mov r1, %1\n\t" - "mov r2, #1\n\t" - ".word 0xec401f2c @ mcrr p15, 0, r1, r0, c12, 2\n\t" - "mov r2, #2\n\t" - ".word 0xec401f0e @ mcrr p15, 0, r1, r0, c14, 0\n\t" - "mov r2, #4\n\t" - ".word 0xec401f0c @ mcrr p15, 0, r1, r0, c12, 0\n\t" - "mov r2, #8\n\t" - ".word 0xec401f06 @ mcrr p15, 0, r1, r0, c6, 0\n\t" - "mov r2, #16\n\t" - ".word 0xec401f05 @ mcrr p15, 0, r1, r0, c5, 0\n\t" - : - : "r" (PAGE_OFFSET), "r" (PAGE_OFFSET + 128) - : "r0", "r1", "r2"); - - unregister_undef_hook(&blockops_hook); - - for (i = 0; i < ARRAY_SIZE(func); i++, err_pos <<= 1) - printk("%30s: %ssupported\n", func[i], err & err_pos ? "not " : ""); - - if ((err & 8) == 0) { - printk(" --> Using %s block cache invalidate\n", - cache_type & (1 << 24) ? "harvard" : "unified"); - if (cache_type & (1 << 24)) - cpu_cache.dma_inv_range = blk_dma_inv_range_harvard; - else - cpu_cache.dma_inv_range = blk_dma_inv_range_unified; - } - if ((err & 4) == 0) { - printk(" --> Using block cache clean\n"); - cpu_cache.dma_clean_range = blk_dma_clean_range; - } - if ((err & 2) == 0) { - printk(" --> Using block cache clean+invalidate\n"); - cpu_cache.dma_flush_range = blk_dma_flush_range; - cpu_cache.flush_kern_dcache_page = blk_flush_kern_dcache_page; - } - - return 0; -} - -__initcall(blockops_check); diff --git a/arch/arm/mm/cache-aurora-l2.h b/arch/arm/mm/cache-aurora-l2.h new file mode 100644 index 00000000000..c8612476983 --- /dev/null +++ b/arch/arm/mm/cache-aurora-l2.h @@ -0,0 +1,55 @@ +/* + * AURORA shared L2 cache controller support + * + * Copyright (C) 2012 Marvell + * + * Yehuda Yitschak <yehuday@marvell.com> + * Gregory CLEMENT <gregory.clement@free-electrons.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __ASM_ARM_HARDWARE_AURORA_L2_H +#define __ASM_ARM_HARDWARE_AURORA_L2_H + +#define AURORA_SYNC_REG 0x700 +#define AURORA_RANGE_BASE_ADDR_REG 0x720 +#define AURORA_FLUSH_PHY_ADDR_REG 0x7f0 +#define AURORA_INVAL_RANGE_REG 0x774 +#define AURORA_CLEAN_RANGE_REG 0x7b4 +#define AURORA_FLUSH_RANGE_REG 0x7f4 + +#define AURORA_ACR_REPLACEMENT_OFFSET 27 +#define AURORA_ACR_REPLACEMENT_MASK \ + (0x3 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_WAYRR \ + (0 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_LFSR \ + (1 << AURORA_ACR_REPLACEMENT_OFFSET) +#define AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU \ + (3 << AURORA_ACR_REPLACEMENT_OFFSET) + +#define AURORA_ACR_FORCE_WRITE_POLICY_OFFSET 0 +#define AURORA_ACR_FORCE_WRITE_POLICY_MASK \ + (0x3 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_POLICY_DIS \ + (0 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_BACK_POLICY \ + (1 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) +#define AURORA_ACR_FORCE_WRITE_THRO_POLICY \ + (2 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET) + +#define MAX_RANGE_SIZE 1024 + +#define AURORA_WAY_SIZE_SHIFT 2 + +#define AURORA_CTRL_FW 0x100 + +/* chose a number outside L2X0_CACHE_ID_PART_MASK to be sure to make + * the distinction between a number coming from hardware and a number + * coming from the device tree */ +#define AURORA_CACHE_ID 0x100 + +#endif /* __ASM_ARM_HARDWARE_AURORA_L2_H */ diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S new file mode 100644 index 00000000000..e505befe51b --- /dev/null +++ b/arch/arm/mm/cache-fa.S @@ -0,0 +1,249 @@ +/* + * linux/arch/arm/mm/cache-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * Based on cache-v4wb.S: + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Processors: FA520 FA526 FA626 + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/memory.h> +#include <asm/page.h> + +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 16 + +/* + * The total size of the data cache. + */ +#ifdef CONFIG_ARCH_GEMINI +#define CACHE_DSIZE 8192 +#else +#define CACHE_DSIZE 16384 +#endif + +/* FIXME: put optimal value here. Current one is just estimation */ +#define CACHE_DLIMIT (CACHE_DSIZE * 2) + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(fa_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(fa_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular address + * space. + */ +ENTRY(fa_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(fa_flush_kern_cache_all) + mov ip, #0 + mov r2, #VM_EXEC +__flush_whole_cache: + mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(fa_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT @ total size >= limit? + bhs __flush_whole_cache @ flush whole D cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_kern_range) + /* fall through */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - addr - kernel address + * - size - size of region + */ +ENTRY(fa_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +fa_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + tst r1, #CACHE_DLINESIZE - 1 + bic r1, r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +fa_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(fa_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(fa_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq fa_dma_clean_range + bcs fa_dma_inv_range + b fa_dma_flush_range +ENDPROC(fa_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(fa_dma_unmap_area) + mov pc, lr +ENDPROC(fa_dma_unmap_area) + + .globl fa_flush_kern_cache_louis + .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions fa diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c new file mode 100644 index 00000000000..e028a7f2ebc --- /dev/null +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -0,0 +1,396 @@ +/* + * arch/arm/mm/cache-feroceon-l2.c - Feroceon L2 cache controller support + * + * Copyright (C) 2008 Marvell Semiconductor + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * References: + * - Unified Layer 2 Cache for Feroceon CPU Cores, + * Document ID MV-S104858-00, Rev. A, October 23 2007. + */ + +#include <linux/init.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/highmem.h> +#include <linux/io.h> +#include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/hardware/cache-feroceon-l2.h> + +#define L2_WRITETHROUGH_KIRKWOOD BIT(4) + +/* + * Low-level cache maintenance operations. + * + * As well as the regular 'clean/invalidate/flush L2 cache line by + * MVA' instructions, the Feroceon L2 cache controller also features + * 'clean/invalidate L2 range by MVA' operations. + * + * Cache range operations are initiated by writing the start and + * end addresses to successive cp15 registers, and process every + * cache line whose first byte address lies in the inclusive range + * [start:end]. + * + * The cache range operations stall the CPU pipeline until completion. + * + * The range operations require two successive cp15 writes, in + * between which we don't want to be preempted. + */ + +static inline unsigned long l2_get_va(unsigned long paddr) +{ +#ifdef CONFIG_HIGHMEM + /* + * Because range ops can't be done on physical addresses, + * we simply install a virtual mapping for it only for the + * TLB lookup to occur, hence no need to flush the untouched + * memory mapping afterwards (note: a cache flush may happen + * in some circumstances depending on the path taken in kunmap_atomic). + */ + void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT); + return (unsigned long)vaddr + (paddr & ~PAGE_MASK); +#else + return __phys_to_virt(paddr); +#endif +} + +static inline void l2_put_va(unsigned long vaddr) +{ +#ifdef CONFIG_HIGHMEM + kunmap_atomic((void *)vaddr); +#endif +} + +static inline void l2_clean_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); +} + +static inline void l2_clean_pa_range(unsigned long start, unsigned long end) +{ + unsigned long va_start, va_end, flags; + + /* + * Make sure 'start' and 'end' reference the same page, as + * L2 is PIPT and range operations only do a TLB lookup on + * the start address. + */ + BUG_ON((start ^ end) >> PAGE_SHIFT); + + va_start = l2_get_va(start); + va_end = va_start + (end - start); + raw_local_irq_save(flags); + __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" + "mcr p15, 1, %1, c15, c9, 5" + : : "r" (va_start), "r" (va_end)); + raw_local_irq_restore(flags); + l2_put_va(va_start); +} + +static inline void l2_clean_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr)); +} + +static inline void l2_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr)); +} + +static inline void l2_inv_pa_range(unsigned long start, unsigned long end) +{ + unsigned long va_start, va_end, flags; + + /* + * Make sure 'start' and 'end' reference the same page, as + * L2 is PIPT and range operations only do a TLB lookup on + * the start address. + */ + BUG_ON((start ^ end) >> PAGE_SHIFT); + + va_start = l2_get_va(start); + va_end = va_start + (end - start); + raw_local_irq_save(flags); + __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" + "mcr p15, 1, %1, c15, c11, 5" + : : "r" (va_start), "r" (va_end)); + raw_local_irq_restore(flags); + l2_put_va(va_start); +} + +static inline void l2_inv_all(void) +{ + __asm__("mcr p15, 1, %0, c15, c11, 0" : : "r" (0)); +} + +/* + * Linux primitives. + * + * Note that the end addresses passed to Linux primitives are + * noninclusive, while the hardware cache range operations use + * inclusive start and end addresses. + */ +#define CACHE_LINE_SIZE 32 +#define MAX_RANGE_SIZE 1024 + +static int l2_wt_override; + +static unsigned long calc_range_end(unsigned long start, unsigned long end) +{ + unsigned long range_end; + + BUG_ON(start & (CACHE_LINE_SIZE - 1)); + BUG_ON(end & (CACHE_LINE_SIZE - 1)); + + /* + * Try to process all cache lines between 'start' and 'end'. + */ + range_end = end; + + /* + * Limit the number of cache lines processed at once, + * since cache range operations stall the CPU pipeline + * until completion. + */ + if (range_end > start + MAX_RANGE_SIZE) + range_end = start + MAX_RANGE_SIZE; + + /* + * Cache range operations can't straddle a page boundary. + */ + if (range_end > (start | (PAGE_SIZE - 1)) + 1) + range_end = (start | (PAGE_SIZE - 1)) + 1; + + return range_end; +} + +static void feroceon_l2_inv_range(unsigned long start, unsigned long end) +{ + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + l2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (start < end && end & (CACHE_LINE_SIZE - 1)) { + l2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); + end &= ~(CACHE_LINE_SIZE - 1); + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < end) { + unsigned long range_end = calc_range_end(start, end); + l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + + dsb(); +} + +static void feroceon_l2_clean_range(unsigned long start, unsigned long end) +{ + /* + * If L2 is forced to WT, the L2 will always be clean and we + * don't need to do anything here. + */ + if (!l2_wt_override) { + start &= ~(CACHE_LINE_SIZE - 1); + end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + } + + dsb(); +} + +static void feroceon_l2_flush_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + if (!l2_wt_override) + l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); + l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + + dsb(); +} + + +/* + * Routines to disable and re-enable the D-cache and I-cache at run + * time. These are necessary because the L2 cache can only be enabled + * or disabled while the L1 Dcache and Icache are both disabled. + */ +static int __init flush_and_disable_dcache(void) +{ + u32 cr; + + cr = get_cr(); + if (cr & CR_C) { + unsigned long flags; + + raw_local_irq_save(flags); + flush_cache_all(); + set_cr(cr & ~CR_C); + raw_local_irq_restore(flags); + return 1; + } + return 0; +} + +static void __init enable_dcache(void) +{ + u32 cr; + + cr = get_cr(); + set_cr(cr | CR_C); +} + +static void __init __invalidate_icache(void) +{ + __asm__("mcr p15, 0, %0, c7, c5, 0" : : "r" (0)); +} + +static int __init invalidate_and_disable_icache(void) +{ + u32 cr; + + cr = get_cr(); + if (cr & CR_I) { + set_cr(cr & ~CR_I); + __invalidate_icache(); + return 1; + } + return 0; +} + +static void __init enable_icache(void) +{ + u32 cr; + + cr = get_cr(); + set_cr(cr | CR_I); +} + +static inline u32 read_extra_features(void) +{ + u32 u; + + __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u)); + + return u; +} + +static inline void write_extra_features(u32 u) +{ + __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); +} + +static void __init disable_l2_prefetch(void) +{ + u32 u; + + /* + * Read the CPU Extra Features register and verify that the + * Disable L2 Prefetch bit is set. + */ + u = read_extra_features(); + if (!(u & 0x01000000)) { + printk(KERN_INFO "Feroceon L2: Disabling L2 prefetch.\n"); + write_extra_features(u | 0x01000000); + } +} + +static void __init enable_l2(void) +{ + u32 u; + + u = read_extra_features(); + if (!(u & 0x00400000)) { + int i, d; + + printk(KERN_INFO "Feroceon L2: Enabling L2\n"); + + d = flush_and_disable_dcache(); + i = invalidate_and_disable_icache(); + l2_inv_all(); + write_extra_features(u | 0x00400000); + if (i) + enable_icache(); + if (d) + enable_dcache(); + } else + pr_err(FW_BUG + "Feroceon L2: bootloader left the L2 cache on!\n"); +} + +void __init feroceon_l2_init(int __l2_wt_override) +{ + l2_wt_override = __l2_wt_override; + + disable_l2_prefetch(); + + outer_cache.inv_range = feroceon_l2_inv_range; + outer_cache.clean_range = feroceon_l2_clean_range; + outer_cache.flush_range = feroceon_l2_flush_range; + + enable_l2(); + + printk(KERN_INFO "Feroceon L2: Cache support initialised%s.\n", + l2_wt_override ? ", in WT override mode" : ""); +} +#ifdef CONFIG_OF +static const struct of_device_id feroceon_ids[] __initconst = { + { .compatible = "marvell,kirkwood-cache"}, + { .compatible = "marvell,feroceon-cache"}, + {} +}; + +int __init feroceon_of_init(void) +{ + struct device_node *node; + void __iomem *base; + bool l2_wt_override = false; + struct resource res; + +#if defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + l2_wt_override = true; +#endif + + node = of_find_matching_node(NULL, feroceon_ids); + if (node && of_device_is_compatible(node, "marvell,kirkwood-cache")) { + if (of_address_to_resource(node, 0, &res)) + return -ENODEV; + + base = ioremap(res.start, resource_size(&res)); + if (!base) + return -ENOMEM; + + if (l2_wt_override) + writel(readl(base) | L2_WRITETHROUGH_KIRKWOOD, base); + else + writel(readl(base) & ~L2_WRITETHROUGH_KIRKWOOD, base); + } + + feroceon_l2_init(l2_wt_override); + + return 0; +} +#endif diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c new file mode 100644 index 00000000000..7c3fb41a462 --- /dev/null +++ b/arch/arm/mm/cache-l2x0.c @@ -0,0 +1,1547 @@ +/* + * arch/arm/mm/cache-l2x0.c - L210/L220 cache controller support + * + * Copyright (C) 2007 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/cpu.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_address.h> + +#include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/hardware/cache-l2x0.h> +#include "cache-tauros3.h" +#include "cache-aurora-l2.h" + +struct l2c_init_data { + const char *type; + unsigned way_size_0; + unsigned num_lock; + void (*of_parse)(const struct device_node *, u32 *, u32 *); + void (*enable)(void __iomem *, u32, unsigned); + void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); + void (*save)(void __iomem *); + struct outer_cache_fns outer_cache; +}; + +#define CACHE_LINE_SIZE 32 + +static void __iomem *l2x0_base; +static DEFINE_RAW_SPINLOCK(l2x0_lock); +static u32 l2x0_way_mask; /* Bitmask of active ways */ +static u32 l2x0_size; +static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; + +struct l2x0_regs l2x0_saved_regs; + +/* + * Common code for all cache controllers. + */ +static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask) +{ + /* wait for cache operation by line or way to complete */ + while (readl_relaxed(reg) & mask) + cpu_relax(); +} + +/* + * By default, we write directly to secure registers. Platforms must + * override this if they are running non-secure. + */ +static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg) +{ + if (val == readl_relaxed(base + reg)) + return; + if (outer_cache.write_sec) + outer_cache.write_sec(val, reg); + else + writel_relaxed(val, base + reg); +} + +/* + * This should only be called when we have a requirement that the + * register be written due to a work-around, as platforms running + * in non-secure mode may not be able to access this register. + */ +static inline void l2c_set_debug(void __iomem *base, unsigned long val) +{ + l2c_write_sec(val, base, L2X0_DEBUG_CTRL); +} + +static void __l2c_op_way(void __iomem *reg) +{ + writel_relaxed(l2x0_way_mask, reg); + l2c_wait_mask(reg, l2x0_way_mask); +} + +static inline void l2c_unlock(void __iomem *base, unsigned num) +{ + unsigned i; + + for (i = 0; i < num; i++) { + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE + + i * L2X0_LOCKDOWN_STRIDE); + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE + + i * L2X0_LOCKDOWN_STRIDE); + } +} + +/* + * Enable the L2 cache controller. This function must only be + * called when the cache controller is known to be disabled. + */ +static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ + unsigned long flags; + + l2c_write_sec(aux, base, L2X0_AUX_CTRL); + + l2c_unlock(base, num_lock); + + local_irq_save(flags); + __l2c_op_way(base + L2X0_INV_WAY); + writel_relaxed(0, base + sync_reg_offset); + l2c_wait_mask(base + sync_reg_offset, 1); + local_irq_restore(flags); + + l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL); +} + +static void l2c_disable(void) +{ + void __iomem *base = l2x0_base; + + outer_cache.flush_all(); + l2c_write_sec(0, base, L2X0_CTRL); + dsb(st); +} + +#ifdef CONFIG_CACHE_PL310 +static inline void cache_wait(void __iomem *reg, unsigned long mask) +{ + /* cache operations by line are atomic on PL310 */ +} +#else +#define cache_wait l2c_wait_mask +#endif + +static inline void cache_sync(void) +{ + void __iomem *base = l2x0_base; + + writel_relaxed(0, base + sync_reg_offset); + cache_wait(base + L2X0_CACHE_SYNC, 1); +} + +#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) +static inline void debug_writel(unsigned long val) +{ + l2c_set_debug(l2x0_base, val); +} +#else +/* Optimised out for non-errata case */ +static inline void debug_writel(unsigned long val) +{ +} +#endif + +static void l2x0_cache_sync(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + cache_sync(); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void __l2x0_flush_all(void) +{ + debug_writel(0x03); + __l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY); + cache_sync(); + debug_writel(0x00); +} + +static void l2x0_flush_all(void) +{ + unsigned long flags; + + /* clean all ways */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2x0_flush_all(); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2x0_disable(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2x0_flush_all(); + l2c_write_sec(0, l2x0_base, L2X0_CTRL); + dsb(st); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c_save(void __iomem *base) +{ + l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +} + +/* + * L2C-210 specific code. + * + * The L2C-2x0 PA, set/way and sync operations are atomic, but we must + * ensure that no background operation is running. The way operations + * are all background tasks. + * + * While a background operation is in progress, any new operation is + * ignored (unspecified whether this causes an error.) Thankfully, not + * used on SMP. + * + * Never has a different sync register other than L2X0_CACHE_SYNC, but + * we use sync_reg_offset here so we can share some of this with L2C-310. + */ +static void __l2c210_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + sync_reg_offset); +} + +static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end) +{ + while (start < end) { + writel_relaxed(start, reg); + start += CACHE_LINE_SIZE; + } +} + +static void l2c210_inv_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); + } + + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_clean_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_all(void) +{ + void __iomem *base = l2x0_base; + + BUG_ON(!irqs_disabled()); + + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + __l2c210_cache_sync(base); +} + +static void l2c210_sync(void) +{ + __l2c210_cache_sync(l2x0_base); +} + +static void l2c210_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1); +} + +static const struct l2c_init_data l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c_disable, + .sync = l2c210_sync, + .resume = l2c210_resume, + }, +}; + +/* + * L2C-220 specific code. + * + * All operations are background operations: they have to be waited for. + * Conflicting requests generate a slave error (which will cause an + * imprecise abort.) Never uses sync_reg_offset, so we hard-code the + * sync register here. + * + * However, we can re-use the l2c210_resume call. + */ +static inline void __l2c220_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + L2X0_CACHE_SYNC); + l2c_wait_mask(base + L2X0_CACHE_SYNC, 1); +} + +static void l2c220_op_way(void __iomem *base, unsigned reg) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + reg); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end, unsigned long flags) +{ + raw_spinlock_t *lock = &l2x0_lock; + + while (start < end) { + unsigned long blk_end = start + min(end - start, 4096UL); + + while (start < blk_end) { + l2c_wait_mask(reg, 1); + writel_relaxed(start, reg); + start += CACHE_LINE_SIZE; + } + + if (blk_end < end) { + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); + } + } + + return flags; +} + +static void l2c220_inv_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); + } + } + + flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_clean_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + start &= ~(CACHE_LINE_SIZE - 1); + if ((end - start) >= l2x0_size) { + l2c220_op_way(base, L2X0_CLEAN_WAY); + return; + } + + raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + start &= ~(CACHE_LINE_SIZE - 1); + if ((end - start) >= l2x0_size) { + l2c220_op_way(base, L2X0_CLEAN_INV_WAY); + return; + } + + raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_all(void) +{ + l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY); +} + +static void l2c220_sync(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c220_cache_sync(l2x0_base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L220_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); +} + +static const struct l2c_init_data l2c220_data = { + .type = "L2C-220", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c220_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c220_inv_range, + .clean_range = l2c220_clean_range, + .flush_range = l2c220_flush_range, + .flush_all = l2c220_flush_all, + .disable = l2c_disable, + .sync = l2c220_sync, + .resume = l2c210_resume, + }, +}; + +/* + * L2C-310 specific code. + * + * Very similar to L2C-210, the PA, set/way and sync operations are atomic, + * and the way operations are all background tasks. However, issuing an + * operation while a background operation is in progress results in a + * SLVERR response. We can reuse: + * + * __l2c210_cache_sync (using sync_reg_offset) + * l2c210_sync + * l2c210_inv_range (if 588369 is not applicable) + * l2c210_clean_range + * l2c210_flush_range (if 588369 is not applicable) + * l2c210_flush_all (if 727915 is not applicable) + * + * Errata: + * 588369: PL310 R0P0->R1P0, fixed R2P0. + * Affects: all clean+invalidate operations + * clean and invalidate skips the invalidate step, so we need to issue + * separate operations. We also require the above debug workaround + * enclosing this code fragment on affected parts. On unaffected parts, + * we must not use this workaround without the debug register writes + * to avoid exposing a problem similar to 727915. + * + * 727915: PL310 R2P0->R3P0, fixed R3P1. + * Affects: clean+invalidate by way + * clean and invalidate by way runs in the background, and a store can + * hit the line between the clean operation and invalidate operation, + * resulting in the store being lost. + * + * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2. + * Affects: 8x64-bit (double fill) line fetches + * double fill line fetches can fail to cause dirty data to be evicted + * from the cache before the new data overwrites the second line. + * + * 753970: PL310 R3P0, fixed R3P1. + * Affects: sync + * prevents merging writes after the sync operation, until another L2C + * operation is performed (or a number of other conditions.) + * + * 769419: PL310 R0P0->R3P1, fixed R3P2. + * Affects: store buffer + * store buffer is not automatically drained. + */ +static void l2c310_inv_range_erratum(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + unsigned long flags; + + /* Erratum 588369 for both clean+invalidate operations */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + l2c_set_debug(base, 0x03); + + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(end, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(end, base + L2X0_INV_LINE_PA); + } + + l2c_set_debug(base, 0x00); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + } + + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c310_flush_range_erratum(unsigned long start, unsigned long end) +{ + raw_spinlock_t *lock = &l2x0_lock; + unsigned long flags; + void __iomem *base = l2x0_base; + + raw_spin_lock_irqsave(lock, flags); + while (start < end) { + unsigned long blk_end = start + min(end - start, 4096UL); + + l2c_set_debug(base, 0x03); + while (start < blk_end) { + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + l2c_set_debug(base, 0x00); + + if (blk_end < end) { + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); + } + } + raw_spin_unlock_irqrestore(lock, flags); + __l2c210_cache_sync(base); +} + +static void l2c310_flush_all_erratum(void) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + l2c_set_debug(base, 0x03); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + l2c_set_debug(base, 0x00); + __l2c210_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void __init l2c310_save(void __iomem *base) +{ + unsigned revision; + + l2c_save(base); + + l2x0_saved_regs.tag_latency = readl_relaxed(base + + L310_TAG_LATENCY_CTRL); + l2x0_saved_regs.data_latency = readl_relaxed(base + + L310_DATA_LATENCY_CTRL); + l2x0_saved_regs.filter_end = readl_relaxed(base + + L310_ADDR_FILTER_END); + l2x0_saved_regs.filter_start = readl_relaxed(base + + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + /* From r2p0, there is Prefetch offset/control register */ + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base + + L310_PREFETCH_CTRL); + + /* From r3p0, there is Power control register */ + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2x0_saved_regs.pwr_ctrl = readl_relaxed(base + + L310_POWER_CTRL); +} + +static void l2c310_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { + unsigned revision; + + /* restore pl310 setup */ + writel_relaxed(l2x0_saved_regs.tag_latency, + base + L310_TAG_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.data_latency, + base + L310_DATA_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.filter_end, + base + L310_ADDR_FILTER_END); + writel_relaxed(l2x0_saved_regs.filter_start, + base + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, + L310_PREFETCH_CTRL); + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, + L310_POWER_CTRL); + + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); + + /* Re-enable full-line-of-zeros for Cortex-A9 */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + } +} + +static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data) +{ + switch (act & ~CPU_TASKS_FROZEN) { + case CPU_STARTING: + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + break; + case CPU_DYING: + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); + break; + } + return NOTIFY_OK; +} + +static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ + unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; + bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + + if (rev >= L310_CACHE_ID_RTL_R2P0) { + if (cortex_a9) { + aux |= L310_AUX_CTRL_EARLY_BRESP; + pr_info("L2C-310 enabling early BRESP for Cortex-A9\n"); + } else if (aux & L310_AUX_CTRL_EARLY_BRESP) { + pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n"); + aux &= ~L310_AUX_CTRL_EARLY_BRESP; + } + } + + if (cortex_a9) { + u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL); + u32 acr = get_auxcr(); + + pr_debug("Cortex-A9 ACR=0x%08x\n", acr); + + if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO)) + pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n"); + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3))) + pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n"); + + if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) { + aux |= L310_AUX_CTRL_FULL_LINE_ZERO; + pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n"); + } + } else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) { + pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n"); + aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP); + } + + if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) { + u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL); + + pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n", + aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "", + aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "", + 1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK)); + } + + /* r3p0 or later has power control register */ + if (rev >= L310_CACHE_ID_RTL_R3P0) { + u32 power_ctrl; + + l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN, + base, L310_POWER_CTRL); + power_ctrl = readl_relaxed(base + L310_POWER_CTRL); + pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n", + power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis", + power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); + } + + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L310_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) { + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + cpu_notifier(l2c310_cpu_enable_flz, 0); + } +} + +static void __init l2c310_fixup(void __iomem *base, u32 cache_id, + struct outer_cache_fns *fns) +{ + unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK; + const char *errata[8]; + unsigned n = 0; + + if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) && + revision < L310_CACHE_ID_RTL_R2P0 && + /* For bcm compatibility */ + fns->inv_range == l2c210_inv_range) { + fns->inv_range = l2c310_inv_range_erratum; + fns->flush_range = l2c310_flush_range_erratum; + errata[n++] = "588369"; + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) && + revision >= L310_CACHE_ID_RTL_R2P0 && + revision < L310_CACHE_ID_RTL_R3P1) { + fns->flush_all = l2c310_flush_all_erratum; + errata[n++] = "727915"; + } + + if (revision >= L310_CACHE_ID_RTL_R3P0 && + revision < L310_CACHE_ID_RTL_R3P2) { + u32 val = readl_relaxed(base + L310_PREFETCH_CTRL); + /* I don't think bit23 is required here... but iMX6 does so */ + if (val & (BIT(30) | BIT(23))) { + val &= ~(BIT(30) | BIT(23)); + l2c_write_sec(val, base, L310_PREFETCH_CTRL); + errata[n++] = "752271"; + } + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) && + revision == L310_CACHE_ID_RTL_R3P0) { + sync_reg_offset = L2X0_DUMMY_REG; + errata[n++] = "753970"; + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_769419)) + errata[n++] = "769419"; + + if (n) { + unsigned i; + + pr_info("L2C-310 errat%s", n > 1 ? "a" : "um"); + for (i = 0; i < n; i++) + pr_cont(" %s", errata[i]); + pr_cont(" enabled\n"); + } +} + +static void l2c310_disable(void) +{ + /* + * If full-line-of-zeros is enabled, we must first disable it in the + * Cortex-A9 auxiliary control register before disabling the L2 cache. + */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); + + l2c_disable(); +} + +static const struct l2c_init_data l2c310_init_fns __initconst = { + .type = "L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +static void __init __l2c_init(const struct l2c_init_data *data, + u32 aux_val, u32 aux_mask, u32 cache_id) +{ + struct outer_cache_fns fns; + unsigned way_size_bits, ways; + u32 aux, old_aux; + + /* + * Sanity check the aux values. aux_mask is the bits we preserve + * from reading the hardware register, and aux_val is the bits we + * set. + */ + if (aux_val & aux_mask) + pr_alert("L2C: platform provided aux values permit register corruption.\n"); + + old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + aux &= aux_mask; + aux |= aux_val; + + if (old_aux != aux) + pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n", + old_aux, aux); + + /* Determine the number of ways */ + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { + case L2X0_CACHE_ID_PART_L310: + if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16)) + pr_warn("L2C: DT/platform tries to modify or specify cache size\n"); + if (aux & (1 << 16)) + ways = 16; + else + ways = 8; + break; + + case L2X0_CACHE_ID_PART_L210: + case L2X0_CACHE_ID_PART_L220: + ways = (aux >> 13) & 0xf; + break; + + case AURORA_CACHE_ID: + ways = (aux >> 13) & 0xf; + ways = 2 << ((ways + 1) >> 2); + break; + + default: + /* Assume unknown chips have 8 ways */ + ways = 8; + break; + } + + l2x0_way_mask = (1 << ways) - 1; + + /* + * way_size_0 is the size that a way_size value of zero would be + * given the calculation: way_size = way_size_0 << way_size_bits. + * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k, + * then way_size_0 would be 8k. + * + * L2 cache size = number of ways * way size. + */ + way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >> + L2C_AUX_CTRL_WAY_SIZE_SHIFT; + l2x0_size = ways * (data->way_size_0 << way_size_bits); + + fns = data->outer_cache; + fns.write_sec = outer_cache.write_sec; + if (data->fixup) + data->fixup(l2x0_base, cache_id, &fns); + + /* + * Check if l2x0 controller is already enabled. If we are booting + * in non-secure mode accessing the below registers will fault. + */ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) + data->enable(l2x0_base, aux, data->num_lock); + + outer_cache = fns; + + /* + * It is strange to save the register state before initialisation, + * but hey, this is what the DT implementations decided to do. + */ + if (data->save) + data->save(l2x0_base); + + /* Re-read it in case some bits are reserved. */ + aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + + pr_info("%s cache controller enabled, %d ways, %d kB\n", + data->type, ways, l2x0_size >> 10); + pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", + data->type, cache_id, aux); +} + +void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +{ + const struct l2c_init_data *data; + u32 cache_id; + + l2x0_base = base; + + cache_id = readl_relaxed(base + L2X0_CACHE_ID); + + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { + default: + case L2X0_CACHE_ID_PART_L210: + data = &l2c210_data; + break; + + case L2X0_CACHE_ID_PART_L220: + data = &l2c220_data; + break; + + case L2X0_CACHE_ID_PART_L310: + data = &l2c310_init_fns; + break; + } + + __l2c_init(data, aux_val, aux_mask, cache_id); +} + +#ifdef CONFIG_OF +static int l2_wt_override; + +/* Aurora don't have the cache ID register available, so we have to + * pass it though the device tree */ +static u32 cache_id_part_number_from_dt; + +static void __init l2x0_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[2] = { 0, 0 }; + u32 tag = 0; + u32 dirty = 0; + u32 val = 0, mask = 0; + + of_property_read_u32(np, "arm,tag-latency", &tag); + if (tag) { + mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; + val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; + } + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1]) { + mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | + L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; + val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | + ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); + } + + of_property_read_u32(np, "arm,dirty-latency", &dirty); + if (dirty) { + mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; + val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static const struct l2c_init_data of_l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .of_parse = l2x0_of_parse, + .enable = l2c_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c_disable, + .sync = l2c210_sync, + .resume = l2c210_resume, + }, +}; + +static const struct l2c_init_data of_l2c220_data __initconst = { + .type = "L2C-220", + .way_size_0 = SZ_8K, + .num_lock = 1, + .of_parse = l2x0_of_parse, + .enable = l2c220_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c220_inv_range, + .clean_range = l2c220_clean_range, + .flush_range = l2c220_flush_range, + .flush_all = l2c220_flush_all, + .disable = l2c_disable, + .sync = l2c220_sync, + .resume = l2c210_resume, + }, +}; + +static void __init l2c310_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[3] = { 0, 0, 0 }; + u32 tag[3] = { 0, 0, 0 }; + u32 filter[2] = { 0, 0 }; + + of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); + if (tag[0] && tag[1] && tag[2]) + writel_relaxed( + L310_LATENCY_CTRL_RD(tag[0] - 1) | + L310_LATENCY_CTRL_WR(tag[1] - 1) | + L310_LATENCY_CTRL_SETUP(tag[2] - 1), + l2x0_base + L310_TAG_LATENCY_CTRL); + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1] && data[2]) + writel_relaxed( + L310_LATENCY_CTRL_RD(data[0] - 1) | + L310_LATENCY_CTRL_WR(data[1] - 1) | + L310_LATENCY_CTRL_SETUP(data[2] - 1), + l2x0_base + L310_DATA_LATENCY_CTRL); + + of_property_read_u32_array(np, "arm,filter-ranges", + filter, ARRAY_SIZE(filter)); + if (filter[1]) { + writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), + l2x0_base + L310_ADDR_FILTER_END); + writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, + l2x0_base + L310_ADDR_FILTER_START); + } +} + +static const struct l2c_init_data of_l2c310_data __initconst = { + .type = "L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +/* + * This is a variant of the of_l2c310_data with .sync set to + * NULL. Outer sync operations are not needed when the system is I/O + * coherent, and potentially harmful in certain situations (PCIe/PL310 + * deadlock on Armada 375/38x due to hardware I/O coherency). The + * other operations are kept because they are infrequent (therefore do + * not cause the deadlock in practice) and needed for secondary CPU + * boot and other power management activities. + */ +static const struct l2c_init_data of_l2c310_coherent_data __initconst = { + .type = "L2C-310 Coherent", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .resume = l2c310_resume, + }, +}; + +/* + * Note that the end addresses passed to Linux primitives are + * noninclusive, while the hardware cache range operations use + * inclusive start and end addresses. + */ +static unsigned long calc_range_end(unsigned long start, unsigned long end) +{ + /* + * Limit the number of cache lines processed at once, + * since cache range operations stall the CPU pipeline + * until completion. + */ + if (end > start + MAX_RANGE_SIZE) + end = start + MAX_RANGE_SIZE; + + /* + * Cache range operations can't straddle a page boundary. + */ + if (end > PAGE_ALIGN(start+1)) + end = PAGE_ALIGN(start+1); + + return end; +} + +/* + * Make sure 'start' and 'end' reference the same page, as L2 is PIPT + * and range operations only do a TLB lookup on the start address. + */ +static void aurora_pa_range(unsigned long start, unsigned long end, + unsigned long offset) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG); + writel_relaxed(end, l2x0_base + offset); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + cache_sync(); +} + +static void aurora_inv_range(unsigned long start, unsigned long end) +{ + /* + * round start and end adresses up to cache line size + */ + start &= ~(CACHE_LINE_SIZE - 1); + end = ALIGN(end, CACHE_LINE_SIZE); + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < end) { + unsigned long range_end = calc_range_end(start, end); + aurora_pa_range(start, range_end - CACHE_LINE_SIZE, + AURORA_INVAL_RANGE_REG); + start = range_end; + } +} + +static void aurora_clean_range(unsigned long start, unsigned long end) +{ + /* + * If L2 is forced to WT, the L2 will always be clean and we + * don't need to do anything here. + */ + if (!l2_wt_override) { + start &= ~(CACHE_LINE_SIZE - 1); + end = ALIGN(end, CACHE_LINE_SIZE); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + aurora_pa_range(start, range_end - CACHE_LINE_SIZE, + AURORA_CLEAN_RANGE_REG); + start = range_end; + } + } +} + +static void aurora_flush_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + end = ALIGN(end, CACHE_LINE_SIZE); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + /* + * If L2 is forced to WT, the L2 will always be clean and we + * just need to invalidate. + */ + if (l2_wt_override) + aurora_pa_range(start, range_end - CACHE_LINE_SIZE, + AURORA_INVAL_RANGE_REG); + else + aurora_pa_range(start, range_end - CACHE_LINE_SIZE, + AURORA_FLUSH_RANGE_REG); + start = range_end; + } +} + +static void aurora_save(void __iomem *base) +{ + l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL); + l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL); +} + +static void aurora_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) { + writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL); + writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL); + } +} + +/* + * For Aurora cache in no outer mode, enable via the CP15 coprocessor + * broadcasting of cache commands to L2. + */ +static void __init aurora_enable_no_outer(void __iomem *base, u32 aux, + unsigned num_lock) +{ + u32 u; + + asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u)); + u |= AURORA_CTRL_FW; /* Set the FW bit */ + asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u)); + + isb(); + + l2c_enable(base, aux, num_lock); +} + +static void __init aurora_fixup(void __iomem *base, u32 cache_id, + struct outer_cache_fns *fns) +{ + sync_reg_offset = AURORA_SYNC_REG; +} + +static void __init aurora_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; + u32 mask = AURORA_ACR_REPLACEMENT_MASK; + + of_property_read_u32(np, "cache-id-part", + &cache_id_part_number_from_dt); + + /* Determine and save the write policy */ + l2_wt_override = of_property_read_bool(np, "wt-override"); + + if (l2_wt_override) { + val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; + mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static const struct l2c_init_data of_aurora_with_outer_data __initconst = { + .type = "Aurora", + .way_size_0 = SZ_4K, + .num_lock = 4, + .of_parse = aurora_of_parse, + .enable = l2c_enable, + .fixup = aurora_fixup, + .save = aurora_save, + .outer_cache = { + .inv_range = aurora_inv_range, + .clean_range = aurora_clean_range, + .flush_range = aurora_flush_range, + .flush_all = l2x0_flush_all, + .disable = l2x0_disable, + .sync = l2x0_cache_sync, + .resume = aurora_resume, + }, +}; + +static const struct l2c_init_data of_aurora_no_outer_data __initconst = { + .type = "Aurora", + .way_size_0 = SZ_4K, + .num_lock = 4, + .of_parse = aurora_of_parse, + .enable = aurora_enable_no_outer, + .fixup = aurora_fixup, + .save = aurora_save, + .outer_cache = { + .resume = aurora_resume, + }, +}; + +/* + * For certain Broadcom SoCs, depending on the address range, different offsets + * need to be added to the address before passing it to L2 for + * invalidation/clean/flush + * + * Section Address Range Offset EMI + * 1 0x00000000 - 0x3FFFFFFF 0x80000000 VC + * 2 0x40000000 - 0xBFFFFFFF 0x40000000 SYS + * 3 0xC0000000 - 0xFFFFFFFF 0x80000000 VC + * + * When the start and end addresses have crossed two different sections, we + * need to break the L2 operation into two, each within its own section. + * For example, if we need to invalidate addresses starts at 0xBFFF0000 and + * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2) + * 0xC0000000 - 0xC0001000 + * + * Note 1: + * By breaking a single L2 operation into two, we may potentially suffer some + * performance hit, but keep in mind the cross section case is very rare + * + * Note 2: + * We do not need to handle the case when the start address is in + * Section 1 and the end address is in Section 3, since it is not a valid use + * case + * + * Note 3: + * Section 1 in practical terms can no longer be used on rev A2. Because of + * that the code does not need to handle section 1 at all. + * + */ +#define BCM_SYS_EMI_START_ADDR 0x40000000UL +#define BCM_VC_EMI_SEC3_START_ADDR 0xC0000000UL + +#define BCM_SYS_EMI_OFFSET 0x40000000UL +#define BCM_VC_EMI_OFFSET 0x80000000UL + +static inline int bcm_addr_is_sys_emi(unsigned long addr) +{ + return (addr >= BCM_SYS_EMI_START_ADDR) && + (addr < BCM_VC_EMI_SEC3_START_ADDR); +} + +static inline unsigned long bcm_l2_phys_addr(unsigned long addr) +{ + if (bcm_addr_is_sys_emi(addr)) + return addr + BCM_SYS_EMI_OFFSET; + else + return addr + BCM_VC_EMI_OFFSET; +} + +static void bcm_inv_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2c210_inv_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2c210_inv_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_clean_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2c210_clean_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2c210_clean_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_flush_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + if ((end - start) >= l2x0_size) { + outer_cache.flush_all(); + return; + } + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2c210_flush_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2c210_flush_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +/* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */ +static const struct l2c_init_data of_bcm_l2x0_data __initconst = { + .type = "BCM-L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .save = l2c310_save, + .outer_cache = { + .inv_range = bcm_inv_range, + .clean_range = bcm_clean_range, + .flush_range = bcm_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +static void __init tauros3_save(void __iomem *base) +{ + l2c_save(base); + + l2x0_saved_regs.aux2_ctrl = + readl_relaxed(base + TAUROS3_AUX2_CTRL); + l2x0_saved_regs.prefetch_ctrl = + readl_relaxed(base + L310_PREFETCH_CTRL); +} + +static void tauros3_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { + writel_relaxed(l2x0_saved_regs.aux2_ctrl, + base + TAUROS3_AUX2_CTRL); + writel_relaxed(l2x0_saved_regs.prefetch_ctrl, + base + L310_PREFETCH_CTRL); + + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); + } +} + +static const struct l2c_init_data of_tauros3_data __initconst = { + .type = "Tauros3", + .way_size_0 = SZ_8K, + .num_lock = 8, + .enable = l2c_enable, + .save = tauros3_save, + /* Tauros3 broadcasts L1 cache operations to L2 */ + .outer_cache = { + .resume = tauros3_resume, + }, +}; + +#define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns } +static const struct of_device_id l2x0_ids[] __initconst = { + L2C_ID("arm,l210-cache", of_l2c210_data), + L2C_ID("arm,l220-cache", of_l2c220_data), + L2C_ID("arm,pl310-cache", of_l2c310_data), + L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), + L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data), + L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data), + L2C_ID("marvell,tauros3-cache", of_tauros3_data), + /* Deprecated IDs */ + L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), + {} +}; + +int __init l2x0_of_init(u32 aux_val, u32 aux_mask) +{ + const struct l2c_init_data *data; + struct device_node *np; + struct resource res; + u32 cache_id, old_aux; + + np = of_find_matching_node(NULL, l2x0_ids); + if (!np) + return -ENODEV; + + if (of_address_to_resource(np, 0, &res)) + return -ENODEV; + + l2x0_base = ioremap(res.start, resource_size(&res)); + if (!l2x0_base) + return -ENOMEM; + + l2x0_saved_regs.phy_base = res.start; + + data = of_match_node(l2x0_ids, np)->data; + + if (of_device_is_compatible(np, "arm,pl310-cache") && + of_property_read_bool(np, "arm,io-coherent")) + data = &of_l2c310_coherent_data; + + old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + if (old_aux != ((old_aux & aux_mask) | aux_val)) { + pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n", + old_aux, (old_aux & aux_mask) | aux_val); + } else if (aux_mask != ~0U && aux_val != 0) { + pr_alert("L2C: platform provided aux values match the hardware, so have no effect. Please remove them.\n"); + } + + /* All L2 caches are unified, so this property should be specified */ + if (!of_property_read_bool(np, "cache-unified")) + pr_err("L2C: device tree omits to specify unified cache\n"); + + /* L2 configuration can only be changed if the cache is disabled */ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) + if (data->of_parse) + data->of_parse(np, &aux_val, &aux_mask); + + if (cache_id_part_number_from_dt) + cache_id = cache_id_part_number_from_dt; + else + cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); + + __l2c_init(data, aux_val, aux_mask, cache_id); + + return 0; +} +#endif diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S new file mode 100644 index 00000000000..8e12ddca003 --- /dev/null +++ b/arch/arm/mm/cache-nop.S @@ -0,0 +1,50 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <linux/init.h> + +#include "proc-macros.S" + +ENTRY(nop_flush_icache_all) + mov pc, lr +ENDPROC(nop_flush_icache_all) + + .globl nop_flush_kern_cache_all + .equ nop_flush_kern_cache_all, nop_flush_icache_all + + .globl nop_flush_kern_cache_louis + .equ nop_flush_kern_cache_louis, nop_flush_icache_all + + .globl nop_flush_user_cache_all + .equ nop_flush_user_cache_all, nop_flush_icache_all + + .globl nop_flush_user_cache_range + .equ nop_flush_user_cache_range, nop_flush_icache_all + + .globl nop_coherent_kern_range + .equ nop_coherent_kern_range, nop_flush_icache_all + +ENTRY(nop_coherent_user_range) + mov r0, 0 + mov pc, lr +ENDPROC(nop_coherent_user_range) + + .globl nop_flush_kern_dcache_area + .equ nop_flush_kern_dcache_area, nop_flush_icache_all + + .globl nop_dma_flush_range + .equ nop_dma_flush_range, nop_flush_icache_all + + .globl nop_dma_map_area + .equ nop_dma_map_area, nop_flush_icache_all + + .globl nop_dma_unmap_area + .equ nop_dma_unmap_area, nop_flush_icache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions nop diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c new file mode 100644 index 00000000000..b273739e635 --- /dev/null +++ b/arch/arm/mm/cache-tauros2.c @@ -0,0 +1,302 @@ +/* + * arch/arm/mm/cache-tauros2.c - Tauros2 L2 cache controller support + * + * Copyright (C) 2008 Marvell Semiconductor + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * References: + * - PJ1 CPU Core Datasheet, + * Document ID MV-S104837-01, Rev 0.7, January 24 2008. + * - PJ4 CPU Core Datasheet, + * Document ID MV-S105190-00, Rev 0.7, March 14 2008. + */ + +#include <linux/init.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/hardware/cache-tauros2.h> + + +/* + * When Tauros2 is used on a CPU that supports the v7 hierarchical + * cache operations, the cache handling code in proc-v7.S takes care + * of everything, including handling DMA coherency. + * + * So, we only need to register outer cache operations here if we're + * being used on a pre-v7 CPU, and we only need to build support for + * outer cache operations into the kernel image if the kernel has been + * configured to support a pre-v7 CPU. + */ +#ifdef CONFIG_CPU_32v5 +/* + * Low-level cache maintenance operations. + */ +static inline void tauros2_clean_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c11, 3" : : "r" (addr)); +} + +static inline void tauros2_clean_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c15, 3" : : "r" (addr)); +} + +static inline void tauros2_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c7, 3" : : "r" (addr)); +} + + +/* + * Linux primitives. + * + * Note that the end addresses passed to Linux primitives are + * noninclusive. + */ +#define CACHE_LINE_SIZE 32 + +static void tauros2_inv_range(unsigned long start, unsigned long end) +{ + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + tauros2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (end & (CACHE_LINE_SIZE - 1)) { + tauros2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); + end &= ~(CACHE_LINE_SIZE - 1); + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < end) { + tauros2_inv_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static void tauros2_clean_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + tauros2_clean_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static void tauros2_flush_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + tauros2_clean_inv_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static void tauros2_disable(void) +{ + __asm__ __volatile__ ( + "mcr p15, 1, %0, c7, c11, 0 @L2 Cache Clean All\n\t" + "mrc p15, 0, %0, c1, c0, 0\n\t" + "bic %0, %0, #(1 << 26)\n\t" + "mcr p15, 0, %0, c1, c0, 0 @Disable L2 Cache\n\t" + : : "r" (0x0)); +} + +static void tauros2_resume(void) +{ + __asm__ __volatile__ ( + "mcr p15, 1, %0, c7, c7, 0 @L2 Cache Invalidate All\n\t" + "mrc p15, 0, %0, c1, c0, 0\n\t" + "orr %0, %0, #(1 << 26)\n\t" + "mcr p15, 0, %0, c1, c0, 0 @Enable L2 Cache\n\t" + : : "r" (0x0)); +} +#endif + +static inline u32 __init read_extra_features(void) +{ + u32 u; + + __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u)); + + return u; +} + +static inline void __init write_extra_features(u32 u) +{ + __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); +} + +static inline int __init cpuid_scheme(void) +{ + return !!((processor_id & 0x000f0000) == 0x000f0000); +} + +static inline u32 __init read_mmfr3(void) +{ + u32 mmfr3; + + __asm__("mrc p15, 0, %0, c0, c1, 7\n" : "=r" (mmfr3)); + + return mmfr3; +} + +static inline u32 __init read_actlr(void) +{ + u32 actlr; + + __asm__("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); + + return actlr; +} + +static inline void __init write_actlr(u32 actlr) +{ + __asm__("mcr p15, 0, %0, c1, c0, 1\n" : : "r" (actlr)); +} + +static void enable_extra_feature(unsigned int features) +{ + u32 u; + + u = read_extra_features(); + + if (features & CACHE_TAUROS2_PREFETCH_ON) + u &= ~0x01000000; + else + u |= 0x01000000; + printk(KERN_INFO "Tauros2: %s L2 prefetch.\n", + (features & CACHE_TAUROS2_PREFETCH_ON) + ? "Enabling" : "Disabling"); + + if (features & CACHE_TAUROS2_LINEFILL_BURST8) + u |= 0x00100000; + else + u &= ~0x00100000; + printk(KERN_INFO "Tauros2: %s line fill burt8.\n", + (features & CACHE_TAUROS2_LINEFILL_BURST8) + ? "Enabling" : "Disabling"); + + write_extra_features(u); +} + +static void __init tauros2_internal_init(unsigned int features) +{ + char *mode = NULL; + + enable_extra_feature(features); + +#ifdef CONFIG_CPU_32v5 + if ((processor_id & 0xff0f0000) == 0x56050000) { + u32 feat; + + /* + * v5 CPUs with Tauros2 have the L2 cache enable bit + * located in the CPU Extra Features register. + */ + feat = read_extra_features(); + if (!(feat & 0x00400000)) { + printk(KERN_INFO "Tauros2: Enabling L2 cache.\n"); + write_extra_features(feat | 0x00400000); + } + + mode = "ARMv5"; + outer_cache.inv_range = tauros2_inv_range; + outer_cache.clean_range = tauros2_clean_range; + outer_cache.flush_range = tauros2_flush_range; + outer_cache.disable = tauros2_disable; + outer_cache.resume = tauros2_resume; + } +#endif + +#ifdef CONFIG_CPU_32v7 + /* + * Check whether this CPU has support for the v7 hierarchical + * cache ops. (PJ4 is in its v7 personality mode if the MMFR3 + * register indicates support for the v7 hierarchical cache + * ops.) + * + * (Although strictly speaking there may exist CPUs that + * implement the v7 cache ops but are only ARMv6 CPUs (due to + * not complying with all of the other ARMv7 requirements), + * there are no real-life examples of Tauros2 being used on + * such CPUs as of yet.) + */ + if (cpuid_scheme() && (read_mmfr3() & 0xf) == 1) { + u32 actlr; + + /* + * When Tauros2 is used in an ARMv7 system, the L2 + * enable bit is located in the Auxiliary System Control + * Register (which is the only register allowed by the + * ARMv7 spec to contain fine-grained cache control bits). + */ + actlr = read_actlr(); + if (!(actlr & 0x00000002)) { + printk(KERN_INFO "Tauros2: Enabling L2 cache.\n"); + write_actlr(actlr | 0x00000002); + } + + mode = "ARMv7"; + } +#endif + + if (mode == NULL) { + printk(KERN_CRIT "Tauros2: Unable to detect CPU mode.\n"); + return; + } + + printk(KERN_INFO "Tauros2: L2 cache support initialised " + "in %s mode.\n", mode); +} + +#ifdef CONFIG_OF +static const struct of_device_id tauros2_ids[] __initconst = { + { .compatible = "marvell,tauros2-cache"}, + {} +}; +#endif + +void __init tauros2_init(unsigned int features) +{ +#ifdef CONFIG_OF + struct device_node *node; + int ret; + unsigned int f; + + node = of_find_matching_node(NULL, tauros2_ids); + if (!node) { + pr_info("Not found marvell,tauros2-cache, disable it\n"); + return; + } + + ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f); + if (ret) { + pr_info("Not found marvell,tauros-cache-features property, " + "disable extra features\n"); + features = 0; + } else + features = f; +#endif + tauros2_internal_init(features); +} diff --git a/arch/arm/mm/cache-tauros3.h b/arch/arm/mm/cache-tauros3.h new file mode 100644 index 00000000000..02c0a97cbc0 --- /dev/null +++ b/arch/arm/mm/cache-tauros3.h @@ -0,0 +1,41 @@ +/* + * Marvell Tauros3 cache controller includes + * + * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> + * + * based on GPL'ed 2.6 kernel sources + * (c) Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_ARM_HARDWARE_TAUROS3_H +#define __ASM_ARM_HARDWARE_TAUROS3_H + +/* + * Marvell Tauros3 L2CC is compatible with PL310 r0p0 + * but with PREFETCH_CTRL (r2p0) and an additional event counter. + * Also, there is AUX2_CTRL for some Marvell specific control. + */ + +#define TAUROS3_EVENT_CNT2_CFG 0x224 +#define TAUROS3_EVENT_CNT2_VAL 0x228 +#define TAUROS3_INV_ALL 0x780 +#define TAUROS3_CLEAN_ALL 0x784 +#define TAUROS3_AUX2_CTRL 0x820 + +/* Registers shifts and masks */ +#define TAUROS3_AUX2_CTRL_LINEFILL_BURST8_EN (1 << 2) + +#endif diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S deleted file mode 100644 index e1994788cf0..00000000000 --- a/arch/arm/mm/cache-v3.S +++ /dev/null @@ -1,137 +0,0 @@ -/* - * linux/arch/arm/mm/cache-v3.S - * - * Copyright (C) 1997-2002 Russell king - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/hardware.h> -#include <asm/page.h> -#include "proc-macros.S" - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - * - * - mm - mm_struct describing address space - */ -ENTRY(v3_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(v3_flush_kern_cache_all) - /* FALLTHROUGH */ - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - flags - vma_area_struct flags describing address space - */ -ENTRY(v3_flush_user_cache_range) - mov ip, #0 - mcreq p15, 0, ip, c7, c0, 0 @ flush ID cache - mov pc, lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_coherent_user_range) - mov pc, lr - -/* - * flush_kern_dcache_page(void *page) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - page aligned address - */ -ENTRY(v3_flush_kern_dcache_page) - /* FALLTHROUGH */ - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_dma_inv_range) - /* FALLTHROUGH */ - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_dma_flush_range) - mov r0, #0 - mcr p15, 0, r0, c7, c0, 0 @ flush ID cache - /* FALLTHROUGH */ - -/* - * dma_clean_range(start, end) - * - * Clean (write back) the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_dma_clean_range) - mov pc, lr - - __INITDATA - - .type v3_cache_fns, #object -ENTRY(v3_cache_fns) - .long v3_flush_kern_cache_all - .long v3_flush_user_cache_all - .long v3_flush_user_cache_range - .long v3_coherent_kern_range - .long v3_coherent_user_range - .long v3_flush_kern_dcache_page - .long v3_dma_inv_range - .long v3_dma_clean_range - .long v3_dma_flush_range - .size v3_cache_fns, . - v3_cache_fns diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index b8ad5d58ebe..a7ba68f59f0 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -9,11 +9,19 @@ */ #include <linux/linkage.h> #include <linux/init.h> -#include <asm/hardware.h> #include <asm/page.h> #include "proc-macros.S" /* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(v4_flush_icache_all) + mov pc, lr +ENDPROC(v4_flush_icache_all) + +/* * flush_user_cache_all() * * Invalidate all cache entries in a particular address @@ -29,9 +37,13 @@ ENTRY(v4_flush_user_cache_all) * Clean and invalidate the entire cache. */ ENTRY(v4_flush_kern_cache_all) +#ifdef CONFIG_CPU_CP15 mov r0, #0 mcr p15, 0, r0, c7, c7, 0 @ flush ID cache mov pc, lr +#else + /* FALLTHROUGH */ +#endif /* * flush_user_cache_range(start, end, flags) @@ -44,9 +56,13 @@ ENTRY(v4_flush_kern_cache_all) * - flags - vma_area_struct flags describing address space */ ENTRY(v4_flush_user_cache_range) +#ifdef CONFIG_CPU_CP15 mov ip, #0 - mcreq p15, 0, ip, c7, c7, 0 @ flush ID cache + mcr p15, 0, ip, c7, c7, 0 @ flush ID cache mov pc, lr +#else + /* FALLTHROUGH */ +#endif /* * coherent_kern_range(start, end) @@ -72,31 +88,19 @@ ENTRY(v4_coherent_kern_range) * - end - virtual end address */ ENTRY(v4_coherent_user_range) + mov r0, #0 mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - addr - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(v4_flush_kern_dcache_page) - /* FALLTHROUGH */ - -/* - * dma_inv_range(start, end) - * - * Invalidate (discard) the specified virtual address range. - * May not write back any entries. If 'start' or 'end' - * are not cache line aligned, those lines must be written - * back. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v4_dma_inv_range) +ENTRY(v4_flush_kern_dcache_area) /* FALLTHROUGH */ /* @@ -108,32 +112,38 @@ ENTRY(v4_dma_inv_range) * - end - virtual end address */ ENTRY(v4_dma_flush_range) +#ifdef CONFIG_CPU_CP15 mov r0, #0 mcr p15, 0, r0, c7, c7, 0 @ flush ID cache +#endif + mov pc, lr + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4_dma_unmap_area) + teq r2, #DMA_TO_DEVICE + bne v4_dma_flush_range /* FALLTHROUGH */ /* - * dma_clean_range(start, end) - * - * Clean (write back) the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction */ -ENTRY(v4_dma_clean_range) +ENTRY(v4_dma_map_area) mov pc, lr +ENDPROC(v4_dma_unmap_area) +ENDPROC(v4_dma_map_area) + + .globl v4_flush_kern_cache_louis + .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all __INITDATA - .type v4_cache_fns, #object -ENTRY(v4_cache_fns) - .long v4_flush_kern_cache_all - .long v4_flush_user_cache_all - .long v4_flush_user_cache_range - .long v4_coherent_kern_range - .long v4_coherent_user_range - .long v4_flush_kern_dcache_page - .long v4_dma_inv_range - .long v4_dma_clean_range - .long v4_dma_flush_range - .size v4_cache_fns, . - v4_cache_fns + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4 diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S index 5c4055b62d9..cd494532140 100644 --- a/arch/arm/mm/cache-v4wb.S +++ b/arch/arm/mm/cache-v4wb.S @@ -7,10 +7,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/linkage.h> #include <linux/init.h> -#include <asm/hardware.h> +#include <asm/memory.h> #include <asm/page.h> #include "proc-macros.S" @@ -33,7 +32,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. * * Size Clean (ticks) Dirty (ticks) * 4096 21 20 21 53 55 54 @@ -46,6 +45,22 @@ */ #define CACHE_DLIMIT (CACHE_DSIZE * 4) + .data +flush_base: + .long FLUSH_BASE + .text + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(v4wb_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(v4wb_flush_icache_all) + /* * flush_user_cache_all() * @@ -63,11 +78,21 @@ ENTRY(v4wb_flush_kern_cache_all) mov ip, #0 mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache __flush_whole_cache: - mov r0, #FLUSH_BASE - add r1, r0, #CACHE_DSIZE -1: ldr r2, [r0], #32 - cmp r0, r1 + ldr r3, =flush_base + ldr r1, [r3, #0] + eor r1, r1, #CACHE_DSIZE + str r1, [r3, #0] + add r2, r1, #CACHE_DSIZE +1: ldr r3, [r1], #32 + cmp r1, r2 + blo 1b +#ifdef FLUSH_BASE_MINICACHE + add r2, r2, #FLUSH_BASE_MINICACHE - FLUSH_BASE + sub r1, r2, #512 @ only 512 bytes +1: ldr r3, [r1], #32 + cmp r1, r2 blo 1b +#endif mcr p15, 0, ip, c7, c10, 4 @ drain write buffer mov pc, lr @@ -82,6 +107,7 @@ __flush_whole_cache: * - flags - vma_area_struct flags describing address space */ ENTRY(v4wb_flush_user_cache_range) + mov ip, #0 sub r3, r1, r0 @ calculate total size tst r2, #VM_EXEC @ executable region? mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache @@ -99,15 +125,16 @@ ENTRY(v4wb_flush_user_cache_range) mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - addr - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(v4wb_flush_kern_dcache_page) - add r1, r0, #PAGE_SZ +ENTRY(v4wb_flush_kern_dcache_area) + add r1, r0, r1 /* fall through */ /* @@ -140,9 +167,9 @@ ENTRY(v4wb_coherent_user_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mov ip, #0 - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache - mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr @@ -157,7 +184,7 @@ ENTRY(v4wb_coherent_user_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4wb_dma_inv_range) +v4wb_dma_inv_range: tst r0, #CACHE_DLINESIZE - 1 bic r0, r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -178,7 +205,7 @@ ENTRY(v4wb_dma_inv_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4wb_dma_clean_range) +v4wb_dma_clean_range: bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -200,17 +227,34 @@ ENTRY(v4wb_dma_clean_range) .globl v4wb_dma_flush_range .set v4wb_dma_flush_range, v4wb_coherent_kern_range +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wb_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq v4wb_dma_clean_range + bcs v4wb_dma_inv_range + b v4wb_dma_flush_range +ENDPROC(v4wb_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wb_dma_unmap_area) + mov pc, lr +ENDPROC(v4wb_dma_unmap_area) + + .globl v4wb_flush_kern_cache_louis + .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all + __INITDATA - .type v4wb_cache_fns, #object -ENTRY(v4wb_cache_fns) - .long v4wb_flush_kern_cache_all - .long v4wb_flush_user_cache_all - .long v4wb_flush_user_cache_range - .long v4wb_coherent_kern_range - .long v4wb_coherent_user_range - .long v4wb_flush_kern_dcache_page - .long v4wb_dma_inv_range - .long v4wb_dma_clean_range - .long v4wb_dma_flush_range - .size v4wb_cache_fns, . - v4wb_cache_fns + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4wb diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index 9bcabd86c6f..11e5e5838bc 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -13,7 +13,6 @@ */ #include <linux/linkage.h> #include <linux/init.h> -#include <asm/hardware.h> #include <asm/page.h> #include "proc-macros.S" @@ -35,13 +34,24 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. * * *** This needs benchmarking */ #define CACHE_DLIMIT 16384 /* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(v4wt_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(v4wt_flush_icache_all) + +/* * flush_user_cache_all() * * Invalidate all cache entries in a particular address @@ -115,20 +125,22 @@ ENTRY(v4wt_coherent_user_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b + mov r0, #0 mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - addr - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(v4wt_flush_kern_dcache_page) +ENTRY(v4wt_flush_kern_dcache_area) mov r2, #0 mcr p15, 0, r2, c7, c5, 0 @ invalidate I cache - add r1, r0, #PAGE_SZ + add r1, r0, r1 /* fallthrough */ /* @@ -142,23 +154,12 @@ ENTRY(v4wt_flush_kern_dcache_page) * - start - virtual start address * - end - virtual end address */ -ENTRY(v4wt_dma_inv_range) +v4wt_dma_inv_range: bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - /* FALLTHROUGH */ - -/* - * dma_clean_range(start, end) - * - * Clean the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v4wt_dma_clean_range) mov pc, lr /* @@ -172,17 +173,33 @@ ENTRY(v4wt_dma_clean_range) .globl v4wt_dma_flush_range .equ v4wt_dma_flush_range, v4wt_dma_inv_range +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wt_dma_unmap_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v4wt_dma_inv_range + /* FALLTHROUGH */ + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wt_dma_map_area) + mov pc, lr +ENDPROC(v4wt_dma_unmap_area) +ENDPROC(v4wt_dma_map_area) + + .globl v4wt_flush_kern_cache_louis + .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all + __INITDATA - .type v4wt_cache_fns, #object -ENTRY(v4wt_cache_fns) - .long v4wt_flush_kern_cache_all - .long v4wt_flush_user_cache_all - .long v4wt_flush_user_cache_range - .long v4wt_coherent_kern_range - .long v4wt_coherent_user_range - .long v4wt_flush_kern_dcache_page - .long v4wt_dma_inv_range - .long v4wt_dma_clean_range - .long v4wt_dma_flush_range - .size v4wt_cache_fns, . - v4wt_cache_fns + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4wt diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 72966d90e95..d8fd4d4bd3d 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -12,6 +12,8 @@ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/unwind.h> #include "proc-macros.S" @@ -21,6 +23,38 @@ #define BTB_FLUSH_SIZE 8 /* + * v6_flush_icache_all() + * + * Flush the whole I-cache. + * + * ARM1136 erratum 411920 - Invalidate Instruction Cache operation can fail. + * This erratum is present in 1136, 1156 and 1176. It does not affect the + * MPCore. + * + * Registers: + * r0 - set to 0 + * r1 - corrupted + */ +ENTRY(v6_flush_icache_all) + mov r0, #0 +#ifdef CONFIG_ARM_ERRATA_411920 + mrs r1, cpsr + cpsid ifa @ disable interrupts + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + msr cpsr_cx, r1 @ restore interrupts + .rept 11 @ ARM Ltd recommends at least + nop @ 11 NOPs + .endr +#else + mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache +#endif + mov pc, lr +ENDPROC(v6_flush_icache_all) + +/* * v6_flush_cache_all() * * Flush the entire cache. @@ -31,8 +65,12 @@ ENTRY(v6_flush_kern_cache_all) mov r0, #0 #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate +#ifndef CONFIG_ARM_ERRATA_411920 mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate #else + b v6_flush_icache_all +#endif +#else mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate #endif mov pc, lr @@ -92,38 +130,51 @@ ENTRY(v6_coherent_kern_range) * - the Icache does not read data from the write buffer */ ENTRY(v6_coherent_user_range) + UNWIND(.fnstart ) +#ifdef HARVARD_CACHE bic r0, r0, #CACHE_LINE_SIZE - 1 1: -#ifdef HARVARD_CACHE - mcr p15, 0, r0, c7, c10, 1 @ clean D line - mcr p15, 0, r0, c7, c5, 1 @ invalidate I line -#endif - mcr p15, 0, r0, c7, c5, 7 @ invalidate BTB entry - add r0, r0, #BTB_FLUSH_SIZE - mcr p15, 0, r0, c7, c5, 7 @ invalidate BTB entry - add r0, r0, #BTB_FLUSH_SIZE - mcr p15, 0, r0, c7, c5, 7 @ invalidate BTB entry - add r0, r0, #BTB_FLUSH_SIZE - mcr p15, 0, r0, c7, c5, 7 @ invalidate BTB entry - add r0, r0, #BTB_FLUSH_SIZE + USER( mcr p15, 0, r0, c7, c10, 1 ) @ clean D line + add r0, r0, #CACHE_LINE_SIZE cmp r0, r1 blo 1b -#ifdef HARVARD_CACHE +#endif mov r0, #0 +#ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c10, 4 @ drain write buffer +#ifndef CONFIG_ARM_ERRATA_411920 + mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate +#else + b v6_flush_icache_all +#endif +#else + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB #endif mov pc, lr /* - * v6_flush_kern_dcache_page(kaddr) + * Fault handling for the cache operation above. If the virtual address in r0 + * isn't mapped, fail with -EFAULT. + */ +9001: + mov r0, #-EFAULT + mov pc, lr + UNWIND(.fnend ) +ENDPROC(v6_coherent_user_range) +ENDPROC(v6_coherent_kern_range) + +/* + * v6_flush_kern_dcache_area(void *addr, size_t size) * * Ensure that the data held in the page kaddr is written back * to the page in question. * - * - kaddr - kernel address (guaranteed to be page aligned) + * - addr - kernel address + * - size - region size */ -ENTRY(v6_flush_kern_dcache_page) - add r1, r0, #PAGE_SZ +ENTRY(v6_flush_kern_dcache_area) + add r1, r0, r1 + bic r0, r0, #D_CACHE_LINE_SIZE - 1 1: #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line @@ -150,7 +201,11 @@ ENTRY(v6_flush_kern_dcache_page) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(v6_dma_inv_range) +v6_dma_inv_range: +#ifdef CONFIG_DMA_CACHE_RWFO + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership +#endif tst r0, #D_CACHE_LINE_SIZE - 1 bic r0, r0, #D_CACHE_LINE_SIZE - 1 #ifdef HARVARD_CACHE @@ -159,6 +214,10 @@ ENTRY(v6_dma_inv_range) mcrne p15, 0, r0, c7, c11, 1 @ clean unified line #endif tst r1, #D_CACHE_LINE_SIZE - 1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrneb r2, [r1, #-1] @ read for ownership + strneb r2, [r1, #-1] @ write for ownership +#endif bic r1, r1, #D_CACHE_LINE_SIZE - 1 #ifdef HARVARD_CACHE mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line @@ -173,6 +232,10 @@ ENTRY(v6_dma_inv_range) #endif add r0, r0, #D_CACHE_LINE_SIZE cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrlo r2, [r0] @ read for ownership + strlo r2, [r0] @ write for ownership +#endif blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer @@ -183,9 +246,12 @@ ENTRY(v6_dma_inv_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(v6_dma_clean_range) +v6_dma_clean_range: bic r0, r0, #D_CACHE_LINE_SIZE - 1 1: +#ifdef CONFIG_DMA_CACHE_RWFO + ldr r2, [r0] @ read for ownership +#endif #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c10, 1 @ clean D line #else @@ -204,6 +270,10 @@ ENTRY(v6_dma_clean_range) * - end - virtual end address of region */ ENTRY(v6_dma_flush_range) +#ifdef CONFIG_DMA_CACHE_RWFO + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership +#endif bic r0, r0, #D_CACHE_LINE_SIZE - 1 1: #ifdef HARVARD_CACHE @@ -213,22 +283,53 @@ ENTRY(v6_dma_flush_range) #endif add r0, r0, #D_CACHE_LINE_SIZE cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrlob r2, [r0] @ read for ownership + strlob r2, [r0] @ write for ownership +#endif blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mov pc, lr +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v6_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_FROM_DEVICE + beq v6_dma_inv_range +#ifndef CONFIG_DMA_CACHE_RWFO + b v6_dma_clean_range +#else + teq r2, #DMA_TO_DEVICE + beq v6_dma_clean_range + b v6_dma_flush_range +#endif +ENDPROC(v6_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v6_dma_unmap_area) +#ifndef CONFIG_DMA_CACHE_RWFO + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v6_dma_inv_range +#endif + mov pc, lr +ENDPROC(v6_dma_unmap_area) + + .globl v6_flush_kern_cache_louis + .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all + __INITDATA - .type v6_cache_fns, #object -ENTRY(v6_cache_fns) - .long v6_flush_kern_cache_all - .long v6_flush_user_cache_all - .long v6_flush_user_cache_range - .long v6_coherent_kern_range - .long v6_coherent_user_range - .long v6_flush_kern_dcache_page - .long v6_dma_inv_range - .long v6_dma_clean_range - .long v6_dma_flush_range - .size v6_cache_fns, . - v6_cache_fns + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v6 diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S new file mode 100644 index 00000000000..615c99e38ba --- /dev/null +++ b/arch/arm/mm/cache-v7.S @@ -0,0 +1,448 @@ +/* + * linux/arch/arm/mm/cache-v7.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2005 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv7 processor support. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/unwind.h> + +#include "proc-macros.S" + +/* + * The secondary kernel init calls v7_flush_dcache_all before it enables + * the L1; however, the L1 comes out of reset in an undefined state, so + * the clean + invalidate performed by v7_flush_dcache_all causes a bunch + * of cache lines with uninitialized data and uninitialized tags to get + * written out to memory, which does really unpleasant things to the main + * processor. We fix this by performing an invalidate, rather than a + * clean + invalidate, before jumping into the kernel. + * + * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs + * to be called for both secondary cores startup and primary core resume + * procedures. + */ +ENTRY(v7_invalidate_l1) + mov r0, #0 + mcr p15, 2, r0, c0, c0, 0 + mrc p15, 1, r0, c0, c0, 0 + + ldr r1, =0x7fff + and r2, r1, r0, lsr #13 + + ldr r1, =0x3ff + + and r3, r1, r0, lsr #3 @ NumWays - 1 + add r2, r2, #1 @ NumSets + + and r0, r0, #0x7 + add r0, r0, #4 @ SetShift + + clz r1, r3 @ WayShift + add r4, r3, #1 @ NumWays +1: sub r2, r2, #1 @ NumSets-- + mov r3, r4 @ Temp = NumWays +2: subs r3, r3, #1 @ Temp-- + mov r5, r3, lsl r1 + mov r6, r2, lsl r0 + orr r5, r5, r6 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift) + mcr p15, 0, r5, c7, c6, 2 + bgt 2b + cmp r2, #0 + bgt 1b + dsb st + isb + mov pc, lr +ENDPROC(v7_invalidate_l1) + +/* + * v7_flush_icache_all() + * + * Flush the whole I-cache. + * + * Registers: + * r0 - set to 0 + */ +ENTRY(v7_flush_icache_all) + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + mov pc, lr +ENDPROC(v7_flush_icache_all) + + /* + * v7_flush_dcache_louis() + * + * Flush the D-cache up to the Level of Unification Inner Shareable + * + * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) + */ + +ENTRY(v7_flush_dcache_louis) + dmb @ ensure ordering with previous memory accesses + mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr + ALT_SMP(ands r3, r0, #(7 << 21)) @ extract LoUIS from clidr + ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr +#ifdef CONFIG_ARM_ERRATA_643719 + ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register + ALT_UP(moveq pc, lr) @ LoUU is zero, so nothing to do + ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p? + biceq r2, r2, #0x0000000f @ clear minor revision number + teqeq r2, r1 @ test for errata affected core and if so... + orreqs r3, #(1 << 21) @ fix LoUIS value (and set flags state to 'ne') +#endif + ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2 + ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2 + moveq pc, lr @ return if level == 0 + mov r10, #0 @ r10 (starting level) = 0 + b flush_levels @ start flushing cache levels +ENDPROC(v7_flush_dcache_louis) + +/* + * v7_flush_dcache_all() + * + * Flush the whole D-cache. + * + * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) + * + * - mm - mm_struct describing address space + */ +ENTRY(v7_flush_dcache_all) + dmb @ ensure ordering with previous memory accesses + mrc p15, 1, r0, c0, c0, 1 @ read clidr + ands r3, r0, #0x7000000 @ extract loc from clidr + mov r3, r3, lsr #23 @ left align loc bit field + beq finished @ if loc is 0, then no need to clean + mov r10, #0 @ start clean at cache level 0 +flush_levels: + add r2, r10, r10, lsr #1 @ work out 3x current cache level + mov r1, r0, lsr r2 @ extract cache type bits from clidr + and r1, r1, #7 @ mask of the bits for current cache only + cmp r1, #2 @ see what cache we have at this level + blt skip @ skip if no cache, or just i-cache +#ifdef CONFIG_PREEMPT + save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic +#endif + mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr + isb @ isb to sych the new cssr&csidr + mrc p15, 1, r1, c0, c0, 0 @ read the new csidr +#ifdef CONFIG_PREEMPT + restore_irqs_notrace r9 +#endif + and r2, r1, #7 @ extract the length of the cache lines + add r2, r2, #4 @ add 4 (line length offset) + ldr r4, =0x3ff + ands r4, r4, r1, lsr #3 @ find maximum number on the way size + clz r5, r4 @ find bit position of way size increment + ldr r7, =0x7fff + ands r7, r7, r1, lsr #13 @ extract max number of the index size +loop1: + mov r9, r7 @ create working copy of max index +loop2: + ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11 + THUMB( lsl r6, r4, r5 ) + THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 + ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11 + THUMB( lsl r6, r9, r2 ) + THUMB( orr r11, r11, r6 ) @ factor index number into r11 + mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way + subs r9, r9, #1 @ decrement the index + bge loop2 + subs r4, r4, #1 @ decrement the way + bge loop1 +skip: + add r10, r10, #2 @ increment cache number + cmp r3, r10 + bgt flush_levels +finished: + mov r10, #0 @ swith back to cache level 0 + mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr + dsb st + isb + mov pc, lr +ENDPROC(v7_flush_dcache_all) + +/* + * v7_flush_cache_all() + * + * Flush the entire cache system. + * The data cache flush is now achieved using atomic clean / invalidates + * working outwards from L1 cache. This is done using Set/Way based cache + * maintenance instructions. + * The instruction cache can still be invalidated back to the point of + * unification in a single instruction. + * + */ +ENTRY(v7_flush_kern_cache_all) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + bl v7_flush_dcache_all + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(v7_flush_kern_cache_all) + + /* + * v7_flush_kern_cache_louis(void) + * + * Flush the data cache up to Level of Unification Inner Shareable. + * Invalidate the I-cache to the point of unification. + */ +ENTRY(v7_flush_kern_cache_louis) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + bl v7_flush_dcache_louis + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(v7_flush_kern_cache_louis) + +/* + * v7_flush_cache_all() + * + * Flush all TLB entries in a particular address space + * + * - mm - mm_struct describing address space + */ +ENTRY(v7_flush_user_cache_all) + /*FALLTHROUGH*/ + +/* + * v7_flush_cache_range(start, end, flags) + * + * Flush a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vm_area_struct flags describing address space + * + * It is assumed that: + * - we have a VIPT cache. + */ +ENTRY(v7_flush_user_cache_range) + mov pc, lr +ENDPROC(v7_flush_user_cache_all) +ENDPROC(v7_flush_user_cache_range) + +/* + * v7_coherent_kern_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v7_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * v7_coherent_user_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v7_coherent_user_range) + UNWIND(.fnstart ) + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification + add r12, r12, r2 + cmp r12, r1 + blo 1b + dsb ishst + icache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 +2: + USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line + add r12, r12, r2 + cmp r12, r1 + blo 2b + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB + dsb ishst + isb + mov pc, lr + +/* + * Fault handling for the cache operation above. If the virtual address in r0 + * isn't mapped, fail with -EFAULT. + */ +9001: +#ifdef CONFIG_ARM_ERRATA_775420 + dsb +#endif + mov r0, #-EFAULT + mov pc, lr + UNWIND(.fnend ) +ENDPROC(v7_coherent_kern_range) +ENDPROC(v7_coherent_user_range) + +/* + * v7_flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v7_flush_kern_dcache_area) + dcache_line_size r2, r3 + add r1, r0, r1 + sub r3, r2, #1 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + mov pc, lr +ENDPROC(v7_flush_kern_dcache_area) + +/* + * v7_dma_inv_range(start,end) + * + * Invalidate the data cache within the specified region; we will + * be performing a DMA operation in this region and we want to + * purge old data in the cache. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +v7_dma_inv_range: + dcache_line_size r2, r3 + sub r3, r2, #1 + tst r0, r3 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif + mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + + tst r1, r3 + bic r1, r1, r3 + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line +1: + mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + mov pc, lr +ENDPROC(v7_dma_inv_range) + +/* + * v7_dma_clean_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +v7_dma_clean_range: + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + mcr p15, 0, r0, c7, c10, 1 @ clean D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + mov pc, lr +ENDPROC(v7_dma_clean_range) + +/* + * v7_dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(v7_dma_flush_range) + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + mov pc, lr +ENDPROC(v7_dma_flush_range) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_FROM_DEVICE + beq v7_dma_inv_range + b v7_dma_clean_range +ENDPROC(v7_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7_dma_unmap_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v7_dma_inv_range + mov pc, lr +ENDPROC(v7_dma_unmap_area) + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v7 diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c new file mode 100644 index 00000000000..6c3edeb66e7 --- /dev/null +++ b/arch/arm/mm/cache-xsc3l2.c @@ -0,0 +1,220 @@ +/* + * arch/arm/mm/cache-xsc3l2.c - XScale3 L2 cache controller support + * + * Copyright (C) 2007 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/init.h> +#include <linux/highmem.h> +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/cacheflush.h> + +#define CR_L2 (1 << 26) + +#define CACHE_LINE_SIZE 32 +#define CACHE_LINE_SHIFT 5 +#define CACHE_WAY_PER_SET 8 + +#define CACHE_WAY_SIZE(l2ctype) (8192 << (((l2ctype) >> 8) & 0xf)) +#define CACHE_SET_SIZE(l2ctype) (CACHE_WAY_SIZE(l2ctype) >> CACHE_LINE_SHIFT) + +static inline int xsc3_l2_present(void) +{ + unsigned long l2ctype; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + return !!(l2ctype & 0xf8); +} + +static inline void xsc3_l2_clean_mva(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr)); +} + +static inline void xsc3_l2_inv_mva(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr)); +} + +static inline void xsc3_l2_inv_all(void) +{ + unsigned long l2ctype, set_way; + int set, way; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) { + for (way = 0; way < CACHE_WAY_PER_SET; way++) { + set_way = (way << 29) | (set << 5); + __asm__("mcr p15, 1, %0, c7, c11, 2" : : "r"(set_way)); + } + } + + dsb(); +} + +static inline void l2_unmap_va(unsigned long va) +{ +#ifdef CONFIG_HIGHMEM + if (va != -1) + kunmap_atomic((void *)va); +#endif +} + +static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va) +{ +#ifdef CONFIG_HIGHMEM + unsigned long va = prev_va & PAGE_MASK; + unsigned long pa_offset = pa << (32 - PAGE_SHIFT); + if (unlikely(pa_offset < (prev_va << (32 - PAGE_SHIFT)))) { + /* + * Switching to a new page. Because cache ops are + * using virtual addresses only, we must put a mapping + * in place for it. + */ + l2_unmap_va(prev_va); + va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT); + } + return va + (pa_offset >> (32 - PAGE_SHIFT)); +#else + return __phys_to_virt(pa); +#endif +} + +static void xsc3_l2_inv_range(unsigned long start, unsigned long end) +{ + unsigned long vaddr; + + if (start == 0 && end == -1ul) { + xsc3_l2_inv_all(); + return; + } + + vaddr = -1; /* to force the first mapping */ + + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < (end & ~(CACHE_LINE_SIZE - 1))) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_inv_mva(vaddr); + start += CACHE_LINE_SIZE; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (start < end) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); + } + + l2_unmap_va(vaddr); + + dsb(); +} + +static void xsc3_l2_clean_range(unsigned long start, unsigned long end) +{ + unsigned long vaddr; + + vaddr = -1; /* to force the first mapping */ + + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_clean_mva(vaddr); + start += CACHE_LINE_SIZE; + } + + l2_unmap_va(vaddr); + + dsb(); +} + +/* + * optimize L2 flush all operation by set/way format + */ +static inline void xsc3_l2_flush_all(void) +{ + unsigned long l2ctype, set_way; + int set, way; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) { + for (way = 0; way < CACHE_WAY_PER_SET; way++) { + set_way = (way << 29) | (set << 5); + __asm__("mcr p15, 1, %0, c7, c15, 2" : : "r"(set_way)); + } + } + + dsb(); +} + +static void xsc3_l2_flush_range(unsigned long start, unsigned long end) +{ + unsigned long vaddr; + + if (start == 0 && end == -1ul) { + xsc3_l2_flush_all(); + return; + } + + vaddr = -1; /* to force the first mapping */ + + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); + start += CACHE_LINE_SIZE; + } + + l2_unmap_va(vaddr); + + dsb(); +} + +static int __init xsc3_l2_init(void) +{ + if (!cpu_is_xsc3() || !xsc3_l2_present()) + return 0; + + if (get_cr() & CR_L2) { + pr_info("XScale3 L2 cache enabled.\n"); + xsc3_l2_inv_all(); + + outer_cache.inv_range = xsc3_l2_inv_range; + outer_cache.clean_range = xsc3_l2_clean_range; + outer_cache.flush_range = xsc3_l2_flush_range; + } + + return 0; +} +core_initcall(xsc3_l2_init); diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c deleted file mode 100644 index 47b0b767f08..00000000000 --- a/arch/arm/mm/consistent.c +++ /dev/null @@ -1,447 +0,0 @@ -/* - * linux/arch/arm/mm/consistent.c - * - * Copyright (C) 2000-2004 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * DMA uncached mapping support. - */ -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/slab.h> -#include <linux/errno.h> -#include <linux/list.h> -#include <linux/init.h> -#include <linux/device.h> -#include <linux/dma-mapping.h> - -#include <asm/cacheflush.h> -#include <asm/io.h> -#include <asm/tlbflush.h> - -#define CONSISTENT_BASE (0xffc00000) -#define CONSISTENT_END (0xffe00000) -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) - -/* - * This is the page table (2MB) covering uncached, DMA consistent allocations - */ -static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); - -/* - * VM region handling support. - * - * This should become something generic, handling VM region allocations for - * vmalloc and similar (ioremap, module space, etc). - * - * I envisage vmalloc()'s supporting vm_struct becoming: - * - * struct vm_struct { - * struct vm_region region; - * unsigned long flags; - * struct page **pages; - * unsigned int nr_pages; - * unsigned long phys_addr; - * }; - * - * get_vm_area() would then call vm_region_alloc with an appropriate - * struct vm_region head (eg): - * - * struct vm_region vmalloc_head = { - * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), - * .vm_start = VMALLOC_START, - * .vm_end = VMALLOC_END, - * }; - * - * However, vmalloc_head.vm_start is variable (typically, it is dependent on - * the amount of RAM found at boot time.) I would imagine that get_vm_area() - * would have to initialise this each time prior to calling vm_region_alloc(). - */ -struct vm_region { - struct list_head vm_list; - unsigned long vm_start; - unsigned long vm_end; - struct page *vm_pages; -}; - -static struct vm_region consistent_head = { - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_start = CONSISTENT_BASE, - .vm_end = CONSISTENT_END, -}; - -static struct vm_region * -vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp) -{ - unsigned long addr = head->vm_start, end = head->vm_end - size; - unsigned long flags; - struct vm_region *c, *new; - - new = kmalloc(sizeof(struct vm_region), gfp); - if (!new) - goto out; - - spin_lock_irqsave(&consistent_lock, flags); - - list_for_each_entry(c, &head->vm_list, vm_list) { - if ((addr + size) < addr) - goto nospc; - if ((addr + size) <= c->vm_start) - goto found; - addr = c->vm_end; - if (addr > end) - goto nospc; - } - - found: - /* - * Insert this entry _before_ the one we found. - */ - list_add_tail(&new->vm_list, &c->vm_list); - new->vm_start = addr; - new->vm_end = addr + size; - - spin_unlock_irqrestore(&consistent_lock, flags); - return new; - - nospc: - spin_unlock_irqrestore(&consistent_lock, flags); - kfree(new); - out: - return NULL; -} - -static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr) -{ - struct vm_region *c; - - list_for_each_entry(c, &head->vm_list, vm_list) { - if (c->vm_start == addr) - goto out; - } - c = NULL; - out: - return c; -} - -#ifdef CONFIG_HUGETLB_PAGE -#error ARM Coherent DMA allocator does not (yet) support huge TLB -#endif - -static void * -__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, - pgprot_t prot) -{ - struct page *page; - struct vm_region *c; - unsigned long order; - u64 mask = ISA_DMA_THRESHOLD, limit; - - if (!consistent_pte) { - printk(KERN_ERR "%s: not initialised\n", __func__); - dump_stack(); - return NULL; - } - - if (dev) { - mask = dev->coherent_dma_mask; - - /* - * Sanity check the DMA mask - it must be non-zero, and - * must be able to be satisfied by a DMA allocation. - */ - if (mask == 0) { - dev_warn(dev, "coherent DMA mask is unset\n"); - goto no_page; - } - - if ((~mask) & ISA_DMA_THRESHOLD) { - dev_warn(dev, "coherent DMA mask %#llx is smaller " - "than system GFP_DMA mask %#llx\n", - mask, (unsigned long long)ISA_DMA_THRESHOLD); - goto no_page; - } - } - - /* - * Sanity check the allocation size. - */ - size = PAGE_ALIGN(size); - limit = (mask + 1) & ~mask; - if ((limit && size >= limit) || - size >= (CONSISTENT_END - CONSISTENT_BASE)) { - printk(KERN_WARNING "coherent allocation too big " - "(requested %#x mask %#llx)\n", size, mask); - goto no_page; - } - - order = get_order(size); - - if (mask != 0xffffffff) - gfp |= GFP_DMA; - - page = alloc_pages(gfp, order); - if (!page) - goto no_page; - - /* - * Invalidate any data that might be lurking in the - * kernel direct-mapped region for device DMA. - */ - { - unsigned long kaddr = (unsigned long)page_address(page); - memset(page_address(page), 0, size); - dmac_flush_range(kaddr, kaddr + size); - } - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = vm_region_alloc(&consistent_head, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); - if (c) { - pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start); - struct page *end = page + (1 << order); - - c->vm_pages = page; - - /* - * Set the "dma handle" - */ - *handle = page_to_dma(dev, page); - - do { - BUG_ON(!pte_none(*pte)); - - set_page_count(page, 1); - /* - * x86 does not mark the pages reserved... - */ - SetPageReserved(page); - set_pte(pte, mk_pte(page, prot)); - page++; - pte++; - } while (size -= PAGE_SIZE); - - /* - * Free the otherwise unused pages. - */ - while (page < end) { - set_page_count(page, 1); - __free_page(page); - page++; - } - - return (void *)c->vm_start; - } - - if (page) - __free_pages(page, order); - no_page: - *handle = ~0; - return NULL; -} - -/* - * Allocate DMA-coherent memory space and return both the kernel remapped - * virtual and bus address for that space. - */ -void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) -{ - return __dma_alloc(dev, size, handle, gfp, - pgprot_noncached(pgprot_kernel)); -} -EXPORT_SYMBOL(dma_alloc_coherent); - -/* - * Allocate a writecombining region, in much the same way as - * dma_alloc_coherent above. - */ -void * -dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) -{ - return __dma_alloc(dev, size, handle, gfp, - pgprot_writecombine(pgprot_kernel)); -} -EXPORT_SYMBOL(dma_alloc_writecombine); - -static int dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - unsigned long flags, user_size, kern_size; - struct vm_region *c; - int ret = -ENXIO; - - user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - - spin_lock_irqsave(&consistent_lock, flags); - c = vm_region_find(&consistent_head, (unsigned long)cpu_addr); - spin_unlock_irqrestore(&consistent_lock, flags); - - if (c) { - unsigned long off = vma->vm_pgoff; - - kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; - - if (off < kern_size && - user_size <= (kern_size - off)) { - vma->vm_flags |= VM_RESERVED; - ret = remap_pfn_range(vma, vma->vm_start, - page_to_pfn(c->vm_pages) + off, - user_size << PAGE_SHIFT, - vma->vm_page_prot); - } - } - - return ret; -} - -int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - return dma_mmap(dev, vma, cpu_addr, dma_addr, size); -} -EXPORT_SYMBOL(dma_mmap_coherent); - -int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - return dma_mmap(dev, vma, cpu_addr, dma_addr, size); -} -EXPORT_SYMBOL(dma_mmap_writecombine); - -/* - * free a page as defined by the above mapping. - */ -void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) -{ - struct vm_region *c; - unsigned long flags, addr; - pte_t *ptep; - - size = PAGE_ALIGN(size); - - spin_lock_irqsave(&consistent_lock, flags); - - c = vm_region_find(&consistent_head, (unsigned long)cpu_addr); - if (!c) - goto no_area; - - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - unsigned long pfn; - - ptep++; - addr += PAGE_SIZE; - - if (!pte_none(pte) && pte_present(pte)) { - pfn = pte_pfn(pte); - - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - - /* - * x86 does not mark the pages reserved... - */ - ClearPageReserved(page); - - __free_page(page); - continue; - } - } - - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - list_del(&c->vm_list); - - spin_unlock_irqrestore(&consistent_lock, flags); - - kfree(c); - return; - - no_area: - spin_unlock_irqrestore(&consistent_lock, flags); - printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", - __func__, cpu_addr); - dump_stack(); -} -EXPORT_SYMBOL(dma_free_coherent); - -/* - * Initialise the consistent memory allocation. - */ -static int __init consistent_init(void) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int ret = 0; - - do { - pgd = pgd_offset(&init_mm, CONSISTENT_BASE); - pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE); - if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - WARN_ON(!pmd_none(*pmd)); - - pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); - if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte = pte; - } while (0); - - return ret; -} - -core_initcall(consistent_init); - -/* - * Make an area consistent for devices. - */ -void consistent_sync(void *vaddr, size_t size, int direction) -{ - unsigned long start = (unsigned long)vaddr; - unsigned long end = start + size; - - switch (direction) { - case DMA_FROM_DEVICE: /* invalidate only */ - dmac_inv_range(start, end); - break; - case DMA_TO_DEVICE: /* writeback only */ - dmac_clean_range(start, end); - break; - case DMA_BIDIRECTIONAL: /* writeback and invalidate */ - dmac_flush_range(start, end); - break; - default: - BUG(); - } -} -EXPORT_SYMBOL(consistent_sync); diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c new file mode 100644 index 00000000000..6eb97b3a748 --- /dev/null +++ b/arch/arm/mm/context.c @@ -0,0 +1,260 @@ +/* + * linux/arch/arm/mm/context.c + * + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/percpu.h> + +#include <asm/mmu_context.h> +#include <asm/smp_plat.h> +#include <asm/thread_notify.h> +#include <asm/tlbflush.h> +#include <asm/proc-fns.h> + +/* + * On ARMv6, we have the following structure in the Context ID: + * + * 31 7 0 + * +-------------------------+-----------+ + * | process ID | ASID | + * +-------------------------+-----------+ + * | context ID | + * +-------------------------------------+ + * + * The ASID is used to tag entries in the CPU caches and TLBs. + * The context ID is used by debuggers and trace logic, and + * should be unique within all running processes. + * + * In big endian operation, the two 32 bit words are swapped if accessed + * by non-64-bit operations. + */ +#define ASID_FIRST_VERSION (1ULL << ASID_BITS) +#define NUM_USER_ASIDS ASID_FIRST_VERSION + +static DEFINE_RAW_SPINLOCK(cpu_asid_lock); +static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); +static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); + +static DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); +static cpumask_t tlb_flush_pending; + +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ + int cpu; + unsigned long flags; + u64 context_id, asid; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + context_id = mm->context.id.counter; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + /* + * We only need to send an IPI if the other CPUs are + * running the same ASID as the one being invalidated. + */ + asid = per_cpu(active_asids, cpu).counter; + if (asid == 0) + asid = per_cpu(reserved_asids, cpu); + if (context_id == asid) + cpumask_set_cpu(cpu, mask); + } + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +#endif + +#ifdef CONFIG_ARM_LPAE +/* + * With LPAE, the ASID and page tables are updated atomicly, so there is + * no need for a reserved set of tables (the active ASID tracking prevents + * any issues across a rollover). + */ +#define cpu_set_reserved_ttbr0() +#else +static void cpu_set_reserved_ttbr0(void) +{ + u32 ttb; + /* + * Copy TTBR1 into TTBR0. + * This points at swapper_pg_dir, which contains only global + * entries so any speculative walks are perfectly safe. + */ + asm volatile( + " mrc p15, 0, %0, c2, c0, 1 @ read TTBR1\n" + " mcr p15, 0, %0, c2, c0, 0 @ set TTBR0\n" + : "=r" (ttb)); + isb(); +} +#endif + +#ifdef CONFIG_PID_IN_CONTEXTIDR +static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd, + void *t) +{ + u32 contextidr; + pid_t pid; + struct thread_info *thread = t; + + if (cmd != THREAD_NOTIFY_SWITCH) + return NOTIFY_DONE; + + pid = task_pid_nr(thread->task) << ASID_BITS; + asm volatile( + " mrc p15, 0, %0, c13, c0, 1\n" + " and %0, %0, %2\n" + " orr %0, %0, %1\n" + " mcr p15, 0, %0, c13, c0, 1\n" + : "=r" (contextidr), "+r" (pid) + : "I" (~ASID_MASK)); + isb(); + + return NOTIFY_OK; +} + +static struct notifier_block contextidr_notifier_block = { + .notifier_call = contextidr_notifier, +}; + +static int __init contextidr_notifier_init(void) +{ + return thread_register_notifier(&contextidr_notifier_block); +} +arch_initcall(contextidr_notifier_init); +#endif + +static void flush_context(unsigned int cpu) +{ + int i; + u64 asid; + + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + for_each_possible_cpu(i) { + if (i == cpu) { + asid = 0; + } else { + asid = atomic64_xchg(&per_cpu(active_asids, i), 0); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); + } + per_cpu(reserved_asids, i) = asid; + } + + /* Queue a TLB invalidate and flush the I-cache if necessary. */ + cpumask_setall(&tlb_flush_pending); + + if (icache_is_vivt_asid_tagged()) + __flush_icache_all(); +} + +static int is_reserved_asid(u64 asid) +{ + int cpu; + for_each_possible_cpu(cpu) + if (per_cpu(reserved_asids, cpu) == asid) + return 1; + return 0; +} + +static u64 new_context(struct mm_struct *mm, unsigned int cpu) +{ + static u32 cur_idx = 1; + u64 asid = atomic64_read(&mm->context.id); + u64 generation = atomic64_read(&asid_generation); + + if (asid != 0 && is_reserved_asid(asid)) { + /* + * Our current ASID was active during a rollover, we can + * continue to use it and this was just a false alarm. + */ + asid = generation | (asid & ~ASID_MASK); + } else { + /* + * Allocate a free ASID. If we can't find one, take a + * note of the currently active ASIDs and mark the TLBs + * as requiring flushes. We always count from ASID #1, + * as we reserve ASID #0 to switch via TTBR0 and to + * avoid speculative page table walks from hitting in + * any partial walk caches, which could be populated + * from overlapping level-1 descriptors used to map both + * the module area and the userspace stack. + */ + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); + if (asid == NUM_USER_ASIDS) { + generation = atomic64_add_return(ASID_FIRST_VERSION, + &asid_generation); + flush_context(cpu); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); + } + __set_bit(asid, asid_map); + cur_idx = asid; + asid |= generation; + cpumask_clear(mm_cpumask(mm)); + } + + return asid; +} + +void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) +{ + unsigned long flags; + unsigned int cpu = smp_processor_id(); + u64 asid; + + if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)) + __check_vmalloc_seq(mm); + + /* + * We cannot update the pgd and the ASID atomicly with classic + * MMU, so switch exclusively to global mappings to avoid + * speculative page table walking with the wrong TTBR. + */ + cpu_set_reserved_ttbr0(); + + asid = atomic64_read(&mm->context.id); + if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) + && atomic64_xchg(&per_cpu(active_asids, cpu), asid)) + goto switch_mm_fastpath; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + /* Check that our ASID belongs to the current generation. */ + asid = atomic64_read(&mm->context.id); + if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) { + asid = new_context(mm, cpu); + atomic64_set(&mm->context.id, asid); + } + + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { + local_flush_bp_all(); + local_flush_tlb_all(); + } + + atomic64_set(&per_cpu(active_asids, cpu), asid); + cpumask_set_cpu(cpu, mm_cpumask(mm)); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + +switch_mm_fastpath: + cpu_switch_mm(mm->pgd, mm); +} diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c new file mode 100644 index 00000000000..d130a5ece5d --- /dev/null +++ b/arch/arm/mm/copypage-fa.c @@ -0,0 +1,86 @@ +/* + * linux/arch/arm/lib/copypage-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * Based on copypage-v4wb.S: + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * Faraday optimised copy_user_page + */ +static void __naked +fa_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, lr} @ 2\n\ + mov r2, %0 @ 1\n\ +1: ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + subs r2, r2, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r2, c7, c10, 4 @ 1 drain WB\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "I" (PAGE_SIZE / 32)); +} + +void fa_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + fa_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * Faraday optimised clear_user_page + * + * Same story as above. + */ +void fa_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns fa_user_fns __initdata = { + .cpu_clear_user_highpage = fa_clear_user_highpage, + .cpu_copy_user_highpage = fa_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-feroceon.c b/arch/arm/mm/copypage-feroceon.c new file mode 100644 index 00000000000..49ee0c1a720 --- /dev/null +++ b/arch/arm/mm/copypage-feroceon.c @@ -0,0 +1,112 @@ +/* + * linux/arch/arm/mm/copypage-feroceon.S + * + * Copyright (C) 2008 Marvell Semiconductors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This handles copy_user_highpage and clear_user_page on Feroceon + * more optimally than the generic implementations. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +static void __naked +feroceon_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4-r9, lr} \n\ + mov ip, %2 \n\ +1: mov lr, r1 \n\ + ldmia r1!, {r2 - r9} \n\ + pld [lr, #32] \n\ + pld [lr, #64] \n\ + pld [lr, #96] \n\ + pld [lr, #128] \n\ + pld [lr, #160] \n\ + pld [lr, #192] \n\ + pld [lr, #224] \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + subs ip, ip, #(32 * 8) \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + bne 1b \n\ + mcr p15, 0, ip, c7, c10, 4 @ drain WB\n\ + ldmfd sp!, {r4-r9, pc}" + : + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE)); +} + +void feroceon_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + flush_cache_page(vma, vaddr, page_to_pfn(from)); + feroceon_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +void feroceon_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile ("\ + mov r1, %2 \n\ + mov r2, #0 \n\ + mov r3, #0 \n\ + mov r4, #0 \n\ + mov r5, #0 \n\ + mov r6, #0 \n\ + mov r7, #0 \n\ + mov ip, #0 \n\ + mov lr, #0 \n\ +1: stmia %0, {r2-r7, ip, lr} \n\ + subs r1, r1, #1 \n\ + mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\ + add %0, %0, #32 \n\ + bne 1b \n\ + mcr p15, 0, r1, c7, c10, 4 @ drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "r4", "r5", "r6", "r7", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns feroceon_user_fns __initdata = { + .cpu_clear_user_highpage = feroceon_clear_user_highpage, + .cpu_copy_user_highpage = feroceon_copy_user_highpage, +}; + diff --git a/arch/arm/mm/copypage-v3.S b/arch/arm/mm/copypage-v3.S deleted file mode 100644 index 3c58ebbf035..00000000000 --- a/arch/arm/mm/copypage-v3.S +++ /dev/null @@ -1,67 +0,0 @@ -/* - * linux/arch/arm/lib/copypage.S - * - * Copyright (C) 1995-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * ASM optimised string functions - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/assembler.h> -#include <asm/asm-offsets.h> - - .text - .align 5 -/* - * ARMv3 optimised copy_user_page - * - * FIXME: do we need to handle cache stuff... - */ -ENTRY(v3_copy_user_page) - stmfd sp!, {r4, lr} @ 2 - mov r2, #PAGE_SZ/64 @ 1 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 -1: stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4 - subs r2, r2, #1 @ 1 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmneia r1!, {r3, r4, ip, lr} @ 4 - bne 1b @ 1 - LOADREGS(fd, sp!, {r4, pc}) @ 3 - - .align 5 -/* - * ARMv3 optimised clear_user_page - * - * FIXME: do we need to handle cache stuff... - */ -ENTRY(v3_clear_user_page) - str lr, [sp, #-4]! - mov r1, #PAGE_SZ/64 @ 1 - mov r2, #0 @ 1 - mov r3, #0 @ 1 - mov ip, #0 @ 1 - mov lr, #0 @ 1 -1: stmia r0!, {r2, r3, ip, lr} @ 4 - stmia r0!, {r2, r3, ip, lr} @ 4 - stmia r0!, {r2, r3, ip, lr} @ 4 - stmia r0!, {r2, r3, ip, lr} @ 4 - subs r1, r1, #1 @ 1 - bne 1b @ 1 - ldr pc, [sp], #4 - - __INITDATA - - .type v3_user_fns, #object -ENTRY(v3_user_fns) - .long v3_clear_user_page - .long v3_copy_user_page - .size v3_user_fns, . - v3_user_fns diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c index fc69dccdace..1267e64133b 100644 --- a/arch/arm/mm/copypage-v4mc.c +++ b/arch/arm/mm/copypage-v4mc.c @@ -15,24 +15,21 @@ */ #include <linux/init.h> #include <linux/mm.h> +#include <linux/highmem.h> -#include <asm/page.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> +#include <asm/cacheflush.h> -/* - * 0xffff8000 to 0xffffffff is reserved for any ARM architecture - * specific hacks for copying pages efficiently. - */ -#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ - L_PTE_CACHEABLE) +#include "mm.h" -#define TOP_PTE(x) pte_offset_kernel(top_pmd, x) +#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ + L_PTE_MT_MINICACHE) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* - * ARMv4 mini-dcache optimised copy_user_page + * ARMv4 mini-dcache optimised copy_user_highpage * * We flush the destination cache lines just before we write the data into the * corresponding address. Since the Dcache is read-allocate, this removes the @@ -41,9 +38,9 @@ static DEFINE_SPINLOCK(minicache_lock); * * Note: We rely on all ARMv4 processors implementing the "invalidate D line" * instruction. If your processor does not supply this, you have to write your - * own copy_user_page that does the right thing. + * own copy_user_highpage that does the right thing. */ -static void __attribute__((naked)) +static void __naked mc_copy_user_page(void *from, void *to) { asm volatile( @@ -67,45 +64,52 @@ mc_copy_user_page(void *from, void *to) : "r" (from), "r" (to), "I" (PAGE_SIZE / 64)); } -void v4_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) +void v4_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) { - spin_lock(&minicache_lock); + void *kto = kmap_atomic(to); + + if (!test_and_set_bit(PG_dcache_clean, &from->flags)) + __flush_dcache_page(page_mapping(from), from); - set_pte(TOP_PTE(0xffff8000), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot)); - flush_tlb_kernel_page(0xffff8000); + raw_spin_lock(&minicache_lock); - mc_copy_user_page((void *)0xffff8000, kto); + set_top_pte(COPYPAGE_MINICACHE, mk_pte(from, minicache_pgprot)); - spin_unlock(&minicache_lock); + mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); + + raw_spin_unlock(&minicache_lock); + + kunmap_atomic(kto); } /* * ARMv4 optimised clear_user_page */ -void __attribute__((naked)) -v4_mc_clear_user_page(void *kaddr, unsigned long vaddr) +void v4_mc_clear_user_highpage(struct page *page, unsigned long vaddr) { - asm volatile( - "str lr, [sp, #-4]!\n\ - mov r1, %0 @ 1\n\ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ mov r2, #0 @ 1\n\ mov r3, #0 @ 1\n\ mov ip, #0 @ 1\n\ mov lr, #0 @ 1\n\ -1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\ - stmia r0!, {r2, r3, ip, lr} @ 4\n\ - stmia r0!, {r2, r3, ip, lr} @ 4\n\ - mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\ - stmia r0!, {r2, r3, ip, lr} @ 4\n\ - stmia r0!, {r2, r3, ip, lr} @ 4\n\ +1: mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ subs r1, r1, #1 @ 1\n\ - bne 1b @ 1\n\ - ldr pc, [sp], #4" - : - : "I" (PAGE_SIZE / 64)); + bne 1b @ 1" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 64) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); } struct cpu_user_fns v4_mc_user_fns __initdata = { - .cpu_clear_user_page = v4_mc_clear_user_page, - .cpu_copy_user_page = v4_mc_copy_user_page, + .cpu_clear_user_highpage = v4_mc_clear_user_highpage, + .cpu_copy_user_highpage = v4_mc_copy_user_highpage, }; diff --git a/arch/arm/mm/copypage-v4wb.S b/arch/arm/mm/copypage-v4wb.S deleted file mode 100644 index 83117354b1c..00000000000 --- a/arch/arm/mm/copypage-v4wb.S +++ /dev/null @@ -1,79 +0,0 @@ -/* - * linux/arch/arm/lib/copypage.S - * - * Copyright (C) 1995-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * ASM optimised string functions - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/asm-offsets.h> - - .text - .align 5 -/* - * ARMv4 optimised copy_user_page - * - * We flush the destination cache lines just before we write the data into the - * corresponding address. Since the Dcache is read-allocate, this removes the - * Dcache aliasing issue. The writes will be forwarded to the write buffer, - * and merged as appropriate. - * - * Note: We rely on all ARMv4 processors implementing the "invalidate D line" - * instruction. If your processor does not supply this, you have to write your - * own copy_user_page that does the right thing. - */ -ENTRY(v4wb_copy_user_page) - stmfd sp!, {r4, lr} @ 2 - mov r2, #PAGE_SZ/64 @ 1 - ldmia r1!, {r3, r4, ip, lr} @ 4 -1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4 - mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4 - subs r2, r2, #1 @ 1 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmneia r1!, {r3, r4, ip, lr} @ 4 - bne 1b @ 1 - mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB - ldmfd sp!, {r4, pc} @ 3 - - .align 5 -/* - * ARMv4 optimised clear_user_page - * - * Same story as above. - */ -ENTRY(v4wb_clear_user_page) - str lr, [sp, #-4]! - mov r1, #PAGE_SZ/64 @ 1 - mov r2, #0 @ 1 - mov r3, #0 @ 1 - mov ip, #0 @ 1 - mov lr, #0 @ 1 -1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line - stmia r0!, {r2, r3, ip, lr} @ 4 - stmia r0!, {r2, r3, ip, lr} @ 4 - mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line - stmia r0!, {r2, r3, ip, lr} @ 4 - stmia r0!, {r2, r3, ip, lr} @ 4 - subs r1, r1, #1 @ 1 - bne 1b @ 1 - mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB - ldr pc, [sp], #4 - - __INITDATA - - .type v4wb_user_fns, #object -ENTRY(v4wb_user_fns) - .long v4wb_clear_user_page - .long v4wb_copy_user_page - .size v4wb_user_fns, . - v4wb_user_fns diff --git a/arch/arm/mm/copypage-v4wb.c b/arch/arm/mm/copypage-v4wb.c new file mode 100644 index 00000000000..067d0fdd630 --- /dev/null +++ b/arch/arm/mm/copypage-v4wb.c @@ -0,0 +1,95 @@ +/* + * linux/arch/arm/mm/copypage-v4wb.c + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * ARMv4 optimised copy_user_highpage + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + * + * Note: We rely on all ARMv4 processors implementing the "invalidate D line" + * instruction. If your processor does not supply this, you have to write your + * own copy_user_highpage that does the right thing. + */ +static void __naked +v4wb_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, lr} @ 2\n\ + mov r2, %2 @ 1\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ +1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4+1\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + subs r2, r2, #1 @ 1\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmneia r1!, {r3, r4, ip, lr} @ 4\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64)); +} + +void v4wb_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + flush_cache_page(vma, vaddr, page_to_pfn(from)); + v4wb_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * ARMv4 optimised clear_user_page + * + * Same story as above. + */ +void v4wb_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 64) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns v4wb_user_fns __initdata = { + .cpu_clear_user_highpage = v4wb_clear_user_highpage, + .cpu_copy_user_highpage = v4wb_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-v4wt.S b/arch/arm/mm/copypage-v4wt.S deleted file mode 100644 index e1f2af28d54..00000000000 --- a/arch/arm/mm/copypage-v4wt.S +++ /dev/null @@ -1,73 +0,0 @@ -/* - * linux/arch/arm/lib/copypage-v4.S - * - * Copyright (C) 1995-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * ASM optimised string functions - * - * This is for CPUs with a writethrough cache and 'flush ID cache' is - * the only supported cache operation. - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/asm-offsets.h> - - .text - .align 5 -/* - * ARMv4 optimised copy_user_page - * - * Since we have writethrough caches, we don't have to worry about - * dirty data in the cache. However, we do have to ensure that - * subsequent reads are up to date. - */ -ENTRY(v4wt_copy_user_page) - stmfd sp!, {r4, lr} @ 2 - mov r2, #PAGE_SZ/64 @ 1 - ldmia r1!, {r3, r4, ip, lr} @ 4 -1: stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4+1 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmia r1!, {r3, r4, ip, lr} @ 4 - subs r2, r2, #1 @ 1 - stmia r0!, {r3, r4, ip, lr} @ 4 - ldmneia r1!, {r3, r4, ip, lr} @ 4 - bne 1b @ 1 - mcr p15, 0, r2, c7, c7, 0 @ flush ID cache - ldmfd sp!, {r4, pc} @ 3 - - .align 5 -/* - * ARMv4 optimised clear_user_page - * - * Same story as above. - */ -ENTRY(v4wt_clear_user_page) - str lr, [sp, #-4]! - mov r1, #PAGE_SZ/64 @ 1 - mov r2, #0 @ 1 - mov r3, #0 @ 1 - mov ip, #0 @ 1 - mov lr, #0 @ 1 -1: stmia r0!, {r2, r3, ip, lr} @ 4 - stmia r0!, {r2, r3, ip, lr} @ 4 - stmia r0!, {r2, r3, ip, lr} @ 4 - stmia r0!, {r2, r3, ip, lr} @ 4 - subs r1, r1, #1 @ 1 - bne 1b @ 1 - mcr p15, 0, r2, c7, c7, 0 @ flush ID cache - ldr pc, [sp], #4 - - __INITDATA - - .type v4wt_user_fns, #object -ENTRY(v4wt_user_fns) - .long v4wt_clear_user_page - .long v4wt_copy_user_page - .size v4wt_user_fns, . - v4wt_user_fns diff --git a/arch/arm/mm/copypage-v4wt.c b/arch/arm/mm/copypage-v4wt.c new file mode 100644 index 00000000000..b85c5da2e51 --- /dev/null +++ b/arch/arm/mm/copypage-v4wt.c @@ -0,0 +1,88 @@ +/* + * linux/arch/arm/mm/copypage-v4wt.S + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is for CPUs with a writethrough cache and 'flush ID cache' is + * the only supported cache operation. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * ARMv4 optimised copy_user_highpage + * + * Since we have writethrough caches, we don't have to worry about + * dirty data in the cache. However, we do have to ensure that + * subsequent reads are up to date. + */ +static void __naked +v4wt_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, lr} @ 2\n\ + mov r2, %2 @ 1\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ +1: stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4+1\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + subs r2, r2, #1 @ 1\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmneia r1!, {r3, r4, ip, lr} @ 4\n\ + bne 1b @ 1\n\ + mcr p15, 0, r2, c7, c7, 0 @ flush ID cache\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64)); +} + +void v4wt_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + v4wt_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * ARMv4 optimised clear_user_page + * + * Same story as above. + */ +void v4wt_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r2, c7, c7, 0 @ flush ID cache" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 64) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns v4wt_user_fns __initdata = { + .cpu_clear_user_highpage = v4wt_clear_user_highpage, + .cpu_copy_user_highpage = v4wt_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 269ce6913ee..b9bcc9d7917 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -10,78 +10,92 @@ #include <linux/init.h> #include <linux/spinlock.h> #include <linux/mm.h> +#include <linux/highmem.h> -#include <asm/page.h> #include <asm/pgtable.h> #include <asm/shmparam.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/cachetype.h> + +#include "mm.h" #if SHMLBA > 16384 #error FIX ME #endif -#define from_address (0xffff8000) -#define to_address (0xffffc000) - -#define TOP_PTE(x) pte_offset_kernel(top_pmd, x) - -static DEFINE_SPINLOCK(v6_lock); +static DEFINE_RAW_SPINLOCK(v6_lock); /* * Copy the user page. No aliasing to deal with so we can just * attack the kernel's existing mapping of these pages. */ -static void v6_copy_user_page_nonaliasing(void *kto, const void *kfrom, unsigned long vaddr) +static void v6_copy_user_highpage_nonaliasing(struct page *to, + struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { + void *kto, *kfrom; + + kfrom = kmap_atomic(from); + kto = kmap_atomic(to); copy_page(kto, kfrom); + kunmap_atomic(kto); + kunmap_atomic(kfrom); } /* * Clear the user page. No aliasing to deal with so we can just * attack the kernel's existing mapping of this page. */ -static void v6_clear_user_page_nonaliasing(void *kaddr, unsigned long vaddr) +static void v6_clear_user_highpage_nonaliasing(struct page *page, unsigned long vaddr) { + void *kaddr = kmap_atomic(page); clear_page(kaddr); + kunmap_atomic(kaddr); } /* - * Copy the page, taking account of the cache colour. + * Discard data in the kernel mapping for the new page. + * FIXME: needs this MCRR to be supported. */ -static void v6_copy_user_page_aliasing(void *kto, const void *kfrom, unsigned long vaddr) +static void discard_old_kernel_data(void *kto) { - unsigned int offset = CACHE_COLOUR(vaddr); - unsigned long from, to; - - /* - * Discard data in the kernel mapping for the new page. - * FIXME: needs this MCRR to be supported. - */ __asm__("mcrr p15, 0, %1, %0, c6 @ 0xec401f06" : : "r" (kto), "r" ((unsigned long)kto + PAGE_SIZE - L1_CACHE_BYTES) : "cc"); +} + +/* + * Copy the page, taking account of the cache colour. + */ +static void v6_copy_user_highpage_aliasing(struct page *to, + struct page *from, unsigned long vaddr, struct vm_area_struct *vma) +{ + unsigned int offset = CACHE_COLOUR(vaddr); + unsigned long kfrom, kto; + + if (!test_and_set_bit(PG_dcache_clean, &from->flags)) + __flush_dcache_page(page_mapping(from), from); + + /* FIXME: not highmem safe */ + discard_old_kernel_data(page_address(to)); /* * Now copy the page using the same cache colour as the * pages ultimate destination. */ - spin_lock(&v6_lock); - - set_pte(TOP_PTE(from_address) + offset, pfn_pte(__pa(kfrom) >> PAGE_SHIFT, PAGE_KERNEL)); - set_pte(TOP_PTE(to_address) + offset, pfn_pte(__pa(kto) >> PAGE_SHIFT, PAGE_KERNEL)); + raw_spin_lock(&v6_lock); - from = from_address + (offset << PAGE_SHIFT); - to = to_address + (offset << PAGE_SHIFT); + kfrom = COPYPAGE_V6_FROM + (offset << PAGE_SHIFT); + kto = COPYPAGE_V6_TO + (offset << PAGE_SHIFT); - flush_tlb_kernel_page(from); - flush_tlb_kernel_page(to); + set_top_pte(kfrom, mk_pte(from, PAGE_KERNEL)); + set_top_pte(kto, mk_pte(to, PAGE_KERNEL)); - copy_page((void *)to, (void *)from); + copy_page((void *)kto, (void *)kfrom); - spin_unlock(&v6_lock); + raw_spin_unlock(&v6_lock); } /* @@ -89,44 +103,35 @@ static void v6_copy_user_page_aliasing(void *kto, const void *kfrom, unsigned lo * so remap the kernel page into the same cache colour as the user * page. */ -static void v6_clear_user_page_aliasing(void *kaddr, unsigned long vaddr) +static void v6_clear_user_highpage_aliasing(struct page *page, unsigned long vaddr) { - unsigned int offset = CACHE_COLOUR(vaddr); - unsigned long to = to_address + (offset << PAGE_SHIFT); + unsigned long to = COPYPAGE_V6_TO + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); - /* - * Discard data in the kernel mapping for the new page - * FIXME: needs this MCRR to be supported. - */ - __asm__("mcrr p15, 0, %1, %0, c6 @ 0xec401f06" - : - : "r" (kaddr), - "r" ((unsigned long)kaddr + PAGE_SIZE - L1_CACHE_BYTES) - : "cc"); + /* FIXME: not highmem safe */ + discard_old_kernel_data(page_address(page)); /* * Now clear the page using the same cache colour as * the pages ultimate destination. */ - spin_lock(&v6_lock); + raw_spin_lock(&v6_lock); - set_pte(TOP_PTE(to_address) + offset, pfn_pte(__pa(kaddr) >> PAGE_SHIFT, PAGE_KERNEL)); - flush_tlb_kernel_page(to); + set_top_pte(to, mk_pte(page, PAGE_KERNEL)); clear_page((void *)to); - spin_unlock(&v6_lock); + raw_spin_unlock(&v6_lock); } struct cpu_user_fns v6_user_fns __initdata = { - .cpu_clear_user_page = v6_clear_user_page_nonaliasing, - .cpu_copy_user_page = v6_copy_user_page_nonaliasing, + .cpu_clear_user_highpage = v6_clear_user_highpage_nonaliasing, + .cpu_copy_user_highpage = v6_copy_user_highpage_nonaliasing, }; static int __init v6_userpage_init(void) { if (cache_is_vipt_aliasing()) { - cpu_user.cpu_clear_user_page = v6_clear_user_page_aliasing; - cpu_user.cpu_copy_user_page = v6_copy_user_page_aliasing; + cpu_user.cpu_clear_user_highpage = v6_clear_user_highpage_aliasing; + cpu_user.cpu_copy_user_highpage = v6_copy_user_highpage_aliasing; } return 0; diff --git a/arch/arm/mm/copypage-xsc3.c b/arch/arm/mm/copypage-xsc3.c new file mode 100644 index 00000000000..03a2042aced --- /dev/null +++ b/arch/arm/mm/copypage-xsc3.c @@ -0,0 +1,114 @@ +/* + * linux/arch/arm/mm/copypage-xsc3.S + * + * Copyright (C) 2004 Intel Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Adapted for 3rd gen XScale core, no more mini-dcache + * Author: Matt Gilbert (matthew.m.gilbert@intel.com) + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * General note: + * We don't really want write-allocate cache behaviour for these functions + * since that will just eat through 8K of the cache. + */ + +/* + * XSC3 optimised copy_user_highpage + * r0 = destination + * r1 = source + * + * The source page may have some clean entries in the cache already, but we + * can safely ignore them - break_cow() will flush them out of the cache + * if we eventually end up using our copied page. + * + */ +static void __naked +xsc3_mc_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, r5, lr} \n\ + mov lr, %2 \n\ + \n\ + pld [r1, #0] \n\ + pld [r1, #32] \n\ +1: pld [r1, #64] \n\ + pld [r1, #96] \n\ + \n\ +2: ldrd r2, [r1], #8 \n\ + mov ip, r0 \n\ + ldrd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate\n\ + strd r2, [r0], #8 \n\ + ldrd r2, [r1], #8 \n\ + strd r4, [r0], #8 \n\ + ldrd r4, [r1], #8 \n\ + strd r2, [r0], #8 \n\ + strd r4, [r0], #8 \n\ + ldrd r2, [r1], #8 \n\ + mov ip, r0 \n\ + ldrd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate\n\ + strd r2, [r0], #8 \n\ + ldrd r2, [r1], #8 \n\ + subs lr, lr, #1 \n\ + strd r4, [r0], #8 \n\ + ldrd r4, [r1], #8 \n\ + strd r2, [r0], #8 \n\ + strd r4, [r0], #8 \n\ + bgt 1b \n\ + beq 2b \n\ + \n\ + ldmfd sp!, {r4, r5, pc}" + : + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64 - 1)); +} + +void xsc3_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + flush_cache_page(vma, vaddr, page_to_pfn(from)); + xsc3_mc_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * XScale optimised clear_user_page + * r0 = destination + * r1 = virtual user address of ultimate destination page + */ +void xsc3_mc_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile ("\ + mov r1, %2 \n\ + mov r2, #0 \n\ + mov r3, #0 \n\ +1: mcr p15, 0, %0, c7, c6, 1 @ invalidate line\n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ + subs r1, r1, #1 \n\ + bne 1b" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns xsc3_mc_user_fns __initdata = { + .cpu_clear_user_highpage = xsc3_mc_clear_user_highpage, + .cpu_copy_user_highpage = xsc3_mc_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c index 42a6ee255ce..0fb85025344 100644 --- a/arch/arm/mm/copypage-xscale.c +++ b/arch/arm/mm/copypage-xscale.c @@ -15,33 +15,28 @@ */ #include <linux/init.h> #include <linux/mm.h> +#include <linux/highmem.h> -#include <asm/page.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> +#include <asm/cacheflush.h> -/* - * 0xffff8000 to 0xffffffff is reserved for any ARM architecture - * specific hacks for copying pages efficiently. - */ -#define COPYPAGE_MINICACHE 0xffff8000 +#include "mm.h" #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ - L_PTE_CACHEABLE) + L_PTE_MT_MINICACHE) -#define TOP_PTE(x) pte_offset_kernel(top_pmd, x) - -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* - * XScale mini-dcache optimised copy_user_page + * XScale mini-dcache optimised copy_user_highpage * * We flush the destination cache lines just before we write the data into the * corresponding address. Since the Dcache is read-allocate, this removes the * Dcache aliasing issue. The writes will be forwarded to the write buffer, * and merged as appropriate. */ -static void __attribute__((naked)) +static void __naked mc_copy_user_page(void *from, void *to) { /* @@ -89,43 +84,52 @@ mc_copy_user_page(void *from, void *to) : "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1)); } -void xscale_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) +void xscale_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) { - spin_lock(&minicache_lock); + void *kto = kmap_atomic(to); + + if (!test_and_set_bit(PG_dcache_clean, &from->flags)) + __flush_dcache_page(page_mapping(from), from); - set_pte(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot)); - flush_tlb_kernel_page(COPYPAGE_MINICACHE); + raw_spin_lock(&minicache_lock); + + set_top_pte(COPYPAGE_MINICACHE, mk_pte(from, minicache_pgprot)); mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); - spin_unlock(&minicache_lock); + raw_spin_unlock(&minicache_lock); + + kunmap_atomic(kto); } /* * XScale optimised clear_user_page */ -void __attribute__((naked)) -xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr) +void +xscale_mc_clear_user_highpage(struct page *page, unsigned long vaddr) { + void *ptr, *kaddr = kmap_atomic(page); asm volatile( - "mov r1, %0 \n\ + "mov r1, %2 \n\ mov r2, #0 \n\ mov r3, #0 \n\ -1: mov ip, r0 \n\ - strd r2, [r0], #8 \n\ - strd r2, [r0], #8 \n\ - strd r2, [r0], #8 \n\ - strd r2, [r0], #8 \n\ +1: mov ip, %0 \n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ subs r1, r1, #1 \n\ mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ - bne 1b \n\ - mov pc, lr" - : - : "I" (PAGE_SIZE / 32)); + bne 1b" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "ip"); + kunmap_atomic(kaddr); } struct cpu_user_fns xscale_mc_user_fns __initdata = { - .cpu_clear_user_page = xscale_mc_clear_user_page, - .cpu_copy_user_page = xscale_mc_copy_user_page, + .cpu_clear_user_highpage = xscale_mc_clear_user_highpage, + .cpu_copy_user_highpage = xscale_mc_copy_user_highpage, }; diff --git a/arch/arm/mm/discontig.c b/arch/arm/mm/discontig.c deleted file mode 100644 index 0d097bb1bc4..00000000000 --- a/arch/arm/mm/discontig.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * linux/arch/arm/mm/discontig.c - * - * Discontiguous memory support. - * - * Initial code: Copyright (C) 1999-2000 Nicolas Pitre - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <linux/bootmem.h> - -#if MAX_NUMNODES != 4 && MAX_NUMNODES != 16 -# error Fix Me Please -#endif - -/* - * Our node_data structure for discontiguous memory. - */ - -static bootmem_data_t node_bootmem_data[MAX_NUMNODES]; - -pg_data_t discontig_node_data[MAX_NUMNODES] = { - { .bdata = &node_bootmem_data[0] }, - { .bdata = &node_bootmem_data[1] }, - { .bdata = &node_bootmem_data[2] }, - { .bdata = &node_bootmem_data[3] }, -#if MAX_NUMNODES == 16 - { .bdata = &node_bootmem_data[4] }, - { .bdata = &node_bootmem_data[5] }, - { .bdata = &node_bootmem_data[6] }, - { .bdata = &node_bootmem_data[7] }, - { .bdata = &node_bootmem_data[8] }, - { .bdata = &node_bootmem_data[9] }, - { .bdata = &node_bootmem_data[10] }, - { .bdata = &node_bootmem_data[11] }, - { .bdata = &node_bootmem_data[12] }, - { .bdata = &node_bootmem_data[13] }, - { .bdata = &node_bootmem_data[14] }, - { .bdata = &node_bootmem_data[15] }, -#endif -}; - -EXPORT_SYMBOL(discontig_node_data); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c new file mode 100644 index 00000000000..1f88db06b13 --- /dev/null +++ b/arch/arm/mm/dma-mapping.c @@ -0,0 +1,2092 @@ +/* + * linux/arch/arm/mm/dma-mapping.c + * + * Copyright (C) 2000-2004 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * DMA uncached mapping support. + */ +#include <linux/bootmem.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/gfp.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/dma-contiguous.h> +#include <linux/highmem.h> +#include <linux/memblock.h> +#include <linux/slab.h> +#include <linux/iommu.h> +#include <linux/io.h> +#include <linux/vmalloc.h> +#include <linux/sizes.h> + +#include <asm/memory.h> +#include <asm/highmem.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/mach/arch.h> +#include <asm/dma-iommu.h> +#include <asm/mach/map.h> +#include <asm/system_info.h> +#include <asm/dma-contiguous.h> + +#include "mm.h" + +/* + * The DMA API is built upon the notion of "buffer ownership". A buffer + * is either exclusively owned by the CPU (and therefore may be accessed + * by it) or exclusively owned by the DMA device. These helper functions + * represent the transitions between these two ownership states. + * + * Note, however, that on later ARMs, this notion does not work due to + * speculative prefetches. We model our approach on the assumption that + * the CPU does do speculative prefetches, which means we clean caches + * before transfers and delay cache invalidation until transfer completion. + * + */ +static void __dma_page_cpu_to_dev(struct page *, unsigned long, + size_t, enum dma_data_direction); +static void __dma_page_dev_to_cpu(struct page *, unsigned long, + size_t, enum dma_data_direction); + +/** + * arm_dma_map_page - map a portion of a page for streaming DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Ensure that any data held in the cache is appropriately discarded + * or written back. + * + * The device owns this memory once this call has completed. The CPU + * can regain ownership by calling dma_unmap_page(). + */ +static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_cpu_to_dev(page, offset, size, dir); + return pfn_to_dma(dev, page_to_pfn(page)) + offset; +} + +static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return pfn_to_dma(dev, page_to_pfn(page)) + offset; +} + +/** + * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * Unmap a page streaming mode DMA translation. The handle and size + * must match what was provided in the previous dma_map_page() call. + * All other usages are undefined. + * + * After this call, reads by the CPU to the buffer are guaranteed to see + * whatever the device wrote there. + */ +static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), + handle & ~PAGE_MASK, size, dir); +} + +static void arm_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned int offset = handle & (PAGE_SIZE - 1); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); + __dma_page_dev_to_cpu(page, offset, size, dir); +} + +static void arm_dma_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned int offset = handle & (PAGE_SIZE - 1); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); + __dma_page_cpu_to_dev(page, offset, size, dir); +} + +struct dma_map_ops arm_dma_ops = { + .alloc = arm_dma_alloc, + .free = arm_dma_free, + .mmap = arm_dma_mmap, + .get_sgtable = arm_dma_get_sgtable, + .map_page = arm_dma_map_page, + .unmap_page = arm_dma_unmap_page, + .map_sg = arm_dma_map_sg, + .unmap_sg = arm_dma_unmap_sg, + .sync_single_for_cpu = arm_dma_sync_single_for_cpu, + .sync_single_for_device = arm_dma_sync_single_for_device, + .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, + .sync_sg_for_device = arm_dma_sync_sg_for_device, + .set_dma_mask = arm_dma_set_mask, +}; +EXPORT_SYMBOL(arm_dma_ops); + +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs); +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs); + +struct dma_map_ops arm_coherent_dma_ops = { + .alloc = arm_coherent_dma_alloc, + .free = arm_coherent_dma_free, + .mmap = arm_dma_mmap, + .get_sgtable = arm_dma_get_sgtable, + .map_page = arm_coherent_dma_map_page, + .map_sg = arm_dma_map_sg, + .set_dma_mask = arm_dma_set_mask, +}; +EXPORT_SYMBOL(arm_coherent_dma_ops); + +static int __dma_supported(struct device *dev, u64 mask, bool warn) +{ + unsigned long max_dma_pfn; + + /* + * If the mask allows for more memory than we can address, + * and we actually have that much memory, then we must + * indicate that DMA to this device is not supported. + */ + if (sizeof(mask) != sizeof(dma_addr_t) && + mask > (dma_addr_t)~0 && + dma_to_pfn(dev, ~0) < max_pfn) { + if (warn) { + dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", + mask); + dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n"); + } + return 0; + } + + max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + + /* + * Translate the device's DMA mask to a PFN limit. This + * PFN number includes the page which we can DMA to. + */ + if (dma_to_pfn(dev, mask) < max_dma_pfn) { + if (warn) + dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n", + mask, + dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1, + max_dma_pfn + 1); + return 0; + } + + return 1; +} + +static u64 get_coherent_dma_mask(struct device *dev) +{ + u64 mask = (u64)DMA_BIT_MASK(32); + + if (dev) { + mask = dev->coherent_dma_mask; + + /* + * Sanity check the DMA mask - it must be non-zero, and + * must be able to be satisfied by a DMA allocation. + */ + if (mask == 0) { + dev_warn(dev, "coherent DMA mask is unset\n"); + return 0; + } + + if (!__dma_supported(dev, mask, true)) + return 0; + } + + return mask; +} + +static void __dma_clear_buffer(struct page *page, size_t size) +{ + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + if (PageHighMem(page)) { + phys_addr_t base = __pfn_to_phys(page_to_pfn(page)); + phys_addr_t end = base + size; + while (size > 0) { + void *ptr = kmap_atomic(page); + memset(ptr, 0, PAGE_SIZE); + dmac_flush_range(ptr, ptr + PAGE_SIZE); + kunmap_atomic(ptr); + page++; + size -= PAGE_SIZE; + } + outer_flush_range(base, end); + } else { + void *ptr = page_address(page); + memset(ptr, 0, size); + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); + } +} + +/* + * Allocate a DMA buffer for 'dev' of size 'size' using the + * specified gfp mask. Note that 'size' must be page aligned. + */ +static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) +{ + unsigned long order = get_order(size); + struct page *page, *p, *e; + + page = alloc_pages(gfp, order); + if (!page) + return NULL; + + /* + * Now split the huge page and free the excess pages + */ + split_page(page, order); + for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) + __free_page(p); + + __dma_clear_buffer(page, size); + + return page; +} + +/* + * Free a DMA buffer. 'size' must be page aligned. + */ +static void __dma_free_buffer(struct page *page, size_t size) +{ + struct page *e = page + (size >> PAGE_SHIFT); + + while (page < e) { + __free_page(page); + page++; + } +} + +#ifdef CONFIG_MMU + +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page, + const void *caller); + +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller); + +static void * +__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) +{ + struct vm_struct *area; + unsigned long addr; + + /* + * DMA allocation can be mapped to user space, so lets + * set VM_USERMAP flags too. + */ + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + area->phys_addr = __pfn_to_phys(page_to_pfn(page)); + + if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { + vunmap((void *)addr); + return NULL; + } + return (void *)addr; +} + +static void __dma_free_remap(void *cpu_addr, size_t size) +{ + unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP; + struct vm_struct *area = find_vm_area(cpu_addr); + if (!area || (area->flags & flags) != flags) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); +} + +#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K + +struct dma_pool { + size_t size; + spinlock_t lock; + unsigned long *bitmap; + unsigned long nr_pages; + void *vaddr; + struct page **pages; +}; + +static struct dma_pool atomic_pool = { + .size = DEFAULT_DMA_COHERENT_POOL_SIZE, +}; + +static int __init early_coherent_pool(char *p) +{ + atomic_pool.size = memparse(p, &p); + return 0; +} +early_param("coherent_pool", early_coherent_pool); + +void __init init_dma_coherent_pool_size(unsigned long size) +{ + /* + * Catch any attempt to set the pool size too late. + */ + BUG_ON(atomic_pool.vaddr); + + /* + * Set architecture specific coherent pool size only if + * it has not been changed by kernel command line parameter. + */ + if (atomic_pool.size == DEFAULT_DMA_COHERENT_POOL_SIZE) + atomic_pool.size = size; +} + +/* + * Initialise the coherent pool for atomic allocations. + */ +static int __init atomic_pool_init(void) +{ + struct dma_pool *pool = &atomic_pool; + pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL); + gfp_t gfp = GFP_KERNEL | GFP_DMA; + unsigned long nr_pages = pool->size >> PAGE_SHIFT; + unsigned long *bitmap; + struct page *page; + struct page **pages; + void *ptr; + int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long); + + bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!bitmap) + goto no_bitmap; + + pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto no_pages; + + if (dev_get_cma_area(NULL)) + ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page, + atomic_pool_init); + else + ptr = __alloc_remap_buffer(NULL, pool->size, gfp, prot, &page, + atomic_pool_init); + if (ptr) { + int i; + + for (i = 0; i < nr_pages; i++) + pages[i] = page + i; + + spin_lock_init(&pool->lock); + pool->vaddr = ptr; + pool->pages = pages; + pool->bitmap = bitmap; + pool->nr_pages = nr_pages; + pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", + (unsigned)pool->size / 1024); + return 0; + } + + kfree(pages); +no_pages: + kfree(bitmap); +no_bitmap: + pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", + (unsigned)pool->size / 1024); + return -ENOMEM; +} +/* + * CMA is activated by core_initcall, so we must be called after it. + */ +postcore_initcall(atomic_pool_init); + +struct dma_contig_early_reserve { + phys_addr_t base; + unsigned long size; +}; + +static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; + +static int dma_mmu_remap_num __initdata; + +void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ + dma_mmu_remap[dma_mmu_remap_num].base = base; + dma_mmu_remap[dma_mmu_remap_num].size = size; + dma_mmu_remap_num++; +} + +void __init dma_contiguous_remap(void) +{ + int i; + for (i = 0; i < dma_mmu_remap_num; i++) { + phys_addr_t start = dma_mmu_remap[i].base; + phys_addr_t end = start + dma_mmu_remap[i].size; + struct map_desc map; + unsigned long addr; + + if (end > arm_lowmem_limit) + end = arm_lowmem_limit; + if (start >= end) + continue; + + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY_DMA_READY; + + /* + * Clear previous low-memory mapping to ensure that the + * TLB does not see any conflicting entries, then flush + * the TLB of the old entries before creating new mappings. + * + * This ensures that any speculatively loaded TLB entries + * (even though they may be rare) can not cause any problems, + * and ensures that this code is architecturally compliant. + */ + for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); + addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + + flush_tlb_kernel_range(__phys_to_virt(start), + __phys_to_virt(end)); + + iotable_init(&map, 1); + } +} + +static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, + void *data) +{ + struct page *page = virt_to_page(addr); + pgprot_t prot = *(pgprot_t *)data; + + set_pte_ext(pte, mk_pte(page, prot), 0); + return 0; +} + +static void __dma_remap(struct page *page, size_t size, pgprot_t prot) +{ + unsigned long start = (unsigned long) page_address(page); + unsigned end = start + size; + + apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); + flush_tlb_kernel_range(start, end); +} + +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller) +{ + struct page *page; + void *ptr; + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; + + ptr = __dma_alloc_remap(page, size, gfp, prot, caller); + if (!ptr) { + __dma_free_buffer(page, size); + return NULL; + } + + *ret_page = page; + return ptr; +} + +static void *__alloc_from_pool(size_t size, struct page **ret_page) +{ + struct dma_pool *pool = &atomic_pool; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned int pageno; + unsigned long flags; + void *ptr = NULL; + unsigned long align_mask; + + if (!pool->vaddr) { + WARN(1, "coherent pool not initialised!\n"); + return NULL; + } + + /* + * Align the region allocation - allocations from pool are rather + * small, so align them to their order in pages, minimum is a page + * size. This helps reduce fragmentation of the DMA space. + */ + align_mask = (1 << get_order(size)) - 1; + + spin_lock_irqsave(&pool->lock, flags); + pageno = bitmap_find_next_zero_area(pool->bitmap, pool->nr_pages, + 0, count, align_mask); + if (pageno < pool->nr_pages) { + bitmap_set(pool->bitmap, pageno, count); + ptr = pool->vaddr + PAGE_SIZE * pageno; + *ret_page = pool->pages[pageno]; + } else { + pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n" + "Please increase it with coherent_pool= kernel parameter!\n", + (unsigned)pool->size / 1024); + } + spin_unlock_irqrestore(&pool->lock, flags); + + return ptr; +} + +static bool __in_atomic_pool(void *start, size_t size) +{ + struct dma_pool *pool = &atomic_pool; + void *end = start + size; + void *pool_start = pool->vaddr; + void *pool_end = pool->vaddr + pool->size; + + if (start < pool_start || start >= pool_end) + return false; + + if (end <= pool_end) + return true; + + WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n", + start, end - 1, pool_start, pool_end - 1); + + return false; +} + +static int __free_from_pool(void *start, size_t size) +{ + struct dma_pool *pool = &atomic_pool; + unsigned long pageno, count; + unsigned long flags; + + if (!__in_atomic_pool(start, size)) + return 0; + + pageno = (start - pool->vaddr) >> PAGE_SHIFT; + count = size >> PAGE_SHIFT; + + spin_lock_irqsave(&pool->lock, flags); + bitmap_clear(pool->bitmap, pageno, count); + spin_unlock_irqrestore(&pool->lock, flags); + + return 1; +} + +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page, + const void *caller) +{ + unsigned long order = get_order(size); + size_t count = size >> PAGE_SHIFT; + struct page *page; + void *ptr; + + page = dma_alloc_from_contiguous(dev, count, order); + if (!page) + return NULL; + + __dma_clear_buffer(page, size); + + if (PageHighMem(page)) { + ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller); + if (!ptr) { + dma_release_from_contiguous(dev, page, count); + return NULL; + } + } else { + __dma_remap(page, size, prot); + ptr = page_address(page); + } + *ret_page = page; + return ptr; +} + +static void __free_from_contiguous(struct device *dev, struct page *page, + void *cpu_addr, size_t size) +{ + if (PageHighMem(page)) + __dma_free_remap(cpu_addr, size); + else + __dma_remap(page, size, PAGE_KERNEL); + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); +} + +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) +{ + prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ? + pgprot_writecombine(prot) : + pgprot_dmacoherent(prot); + return prot; +} + +#define nommu() 0 + +#else /* !CONFIG_MMU */ + +#define nommu() 1 + +#define __get_dma_pgprot(attrs, prot) __pgprot(0) +#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL +#define __alloc_from_pool(size, ret_page) NULL +#define __alloc_from_contiguous(dev, size, prot, ret, c) NULL +#define __free_from_pool(cpu_addr, size) 0 +#define __free_from_contiguous(dev, page, cpu_addr, size) do { } while (0) +#define __dma_free_remap(cpu_addr, size) do { } while (0) + +#endif /* CONFIG_MMU */ + +static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, + struct page **ret_page) +{ + struct page *page; + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; + + *ret_page = page; + return page_address(page); +} + + + +static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller) +{ + u64 mask = get_coherent_dma_mask(dev); + struct page *page = NULL; + void *addr; + +#ifdef CONFIG_DMA_API_DEBUG + u64 limit = (mask + 1) & ~mask; + if (limit && size >= limit) { + dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", + size, mask); + return NULL; + } +#endif + + if (!mask) + return NULL; + + if (mask < 0xffffffffULL) + gfp |= GFP_DMA; + + /* + * Following is a work-around (a.k.a. hack) to prevent pages + * with __GFP_COMP being passed to split_page() which cannot + * handle them. The real problem is that this flag probably + * should be 0 on ARM as it is not supported on this + * platform; see CONFIG_HUGETLBFS. + */ + gfp &= ~(__GFP_COMP); + + *handle = DMA_ERROR_CODE; + size = PAGE_ALIGN(size); + + if (is_coherent || nommu()) + addr = __alloc_simple_buffer(dev, size, gfp, &page); + else if (!(gfp & __GFP_WAIT)) + addr = __alloc_from_pool(size, &page); + else if (!dev_get_cma_area(dev)) + addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); + else + addr = __alloc_from_contiguous(dev, size, prot, &page, caller); + + if (addr) + *handle = pfn_to_dma(dev, page_to_pfn(page)); + + return addr; +} + +/* + * Allocate DMA-coherent memory space and return both the kernel remapped + * virtual and bus address for that space. + */ +void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, struct dma_attrs *attrs) +{ + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); + void *memory; + + if (dma_alloc_from_coherent(dev, size, handle, &memory)) + return memory; + + return __dma_alloc(dev, size, handle, gfp, prot, false, + __builtin_return_address(0)); +} + +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); + void *memory; + + if (dma_alloc_from_coherent(dev, size, handle, &memory)) + return memory; + + return __dma_alloc(dev, size, handle, gfp, prot, true, + __builtin_return_address(0)); +} + +/* + * Create userspace mapping for the DMA-coherent memory. + */ +int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + int ret = -ENXIO; +#ifdef CONFIG_MMU + unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long pfn = dma_to_pfn(dev, dma_addr); + unsigned long off = vma->vm_pgoff; + + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); + + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + } +#endif /* CONFIG_MMU */ + + return ret; +} + +/* + * Free a buffer as defined by the above mapping. + */ +static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs, + bool is_coherent) +{ + struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); + + if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) + return; + + size = PAGE_ALIGN(size); + + if (is_coherent || nommu()) { + __dma_free_buffer(page, size); + } else if (__free_from_pool(cpu_addr, size)) { + return; + } else if (!dev_get_cma_area(dev)) { + __dma_free_remap(cpu_addr, size); + __dma_free_buffer(page, size); + } else { + /* + * Non-atomic allocations cannot be freed with IRQs disabled + */ + WARN_ON(irqs_disabled()); + __free_from_contiguous(dev, page, cpu_addr, size); + } +} + +void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_dma_free(dev, size, cpu_addr, handle, attrs, false); +} + +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); +} + +int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + struct dma_attrs *attrs) +{ + struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); + int ret; + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (unlikely(ret)) + return ret; + + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return 0; +} + +static void dma_cache_maint_page(struct page *page, unsigned long offset, + size_t size, enum dma_data_direction dir, + void (*op)(const void *, size_t, int)) +{ + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + offset / PAGE_SIZE; + offset %= PAGE_SIZE; + + /* + * A single sg entry may refer to multiple physically contiguous + * pages. But we still need to process highmem pages individually. + * If highmem is not configured then the bulk of this loop gets + * optimized out. + */ + do { + size_t len = left; + void *vaddr; + + page = pfn_to_page(pfn); + + if (PageHighMem(page)) { + if (len + offset > PAGE_SIZE) + len = PAGE_SIZE - offset; + + if (cache_is_vipt_nonaliasing()) { + vaddr = kmap_atomic(page); + op(vaddr + offset, len, dir); + kunmap_atomic(vaddr); + } else { + vaddr = kmap_high_get(page); + if (vaddr) { + op(vaddr + offset, len, dir); + kunmap_high(page); + } + } + } else { + vaddr = page_address(page) + offset; + op(vaddr, len, dir); + } + offset = 0; + pfn++; + left -= len; + } while (left); +} + +/* + * Make an area consistent for devices. + * Note: Drivers should NOT use this function directly, as it will break + * platforms with CONFIG_DMABOUNCE. + * Use the driver DMA support - see dma-mapping.h (dma_sync_*) + */ +static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr; + + dma_cache_maint_page(page, off, size, dir, dmac_map_area); + + paddr = page_to_phys(page) + off; + if (dir == DMA_FROM_DEVICE) { + outer_inv_range(paddr, paddr + size); + } else { + outer_clean_range(paddr, paddr + size); + } + /* FIXME: non-speculating: flush on bidirectional mappings? */ +} + +static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = page_to_phys(page) + off; + + /* FIXME: non-speculating: not required */ + /* in any case, don't bother invalidating if DMA to device */ + if (dir != DMA_TO_DEVICE) { + outer_inv_range(paddr, paddr + size); + + dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); + } + + /* + * Mark the D-cache clean for these pages to avoid extra flushing. + */ + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + off / PAGE_SIZE; + off %= PAGE_SIZE; + if (off) { + pfn++; + left -= PAGE_SIZE - off; + } + while (left >= PAGE_SIZE) { + page = pfn_to_page(pfn++); + set_bit(PG_dcache_clean, &page->flags); + left -= PAGE_SIZE; + } + } +} + +/** + * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * This is the scatter-gather version of the dma_map_single interface. + * Here the scatter gather list elements are each tagged with the + * appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + * + * Device ownership issues as mentioned for dma_map_single are the same + * here. + */ +int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + struct scatterlist *s; + int i, j; + + for_each_sg(sg, s, nents, i) { +#ifdef CONFIG_NEED_SG_DMA_LENGTH + s->dma_length = s->length; +#endif + s->dma_address = ops->map_page(dev, sg_page(s), s->offset, + s->length, dir, attrs); + if (dma_mapping_error(dev, s->dma_address)) + goto bad_mapping; + } + return nents; + + bad_mapping: + for_each_sg(sg, s, i, j) + ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); + return 0; +} + +/** + * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + struct scatterlist *s; + + int i; + + for_each_sg(sg, s, nents, i) + ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); +} + +/** + * arm_dma_sync_sg_for_cpu + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length, + dir); +} + +/** + * arm_dma_sync_sg_for_device + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + ops->sync_single_for_device(dev, sg_dma_address(s), s->length, + dir); +} + +/* + * Return whether the given device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during bus mastering, then you would pass 0x00ffffff as the mask + * to this function. + */ +int dma_supported(struct device *dev, u64 mask) +{ + return __dma_supported(dev, mask, false); +} +EXPORT_SYMBOL(dma_supported); + +int arm_dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + + *dev->dma_mask = dma_mask; + + return 0; +} + +#define PREALLOC_DMA_DEBUG_ENTRIES 4096 + +static int __init dma_debug_do_init(void) +{ + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + return 0; +} +fs_initcall(dma_debug_do_init); + +#ifdef CONFIG_ARM_DMA_USE_IOMMU + +/* IOMMU */ + +static int extend_iommu_mapping(struct dma_iommu_mapping *mapping); + +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, + size_t size) +{ + unsigned int order = get_order(size); + unsigned int align = 0; + unsigned int count, start; + size_t mapping_size = mapping->bits << PAGE_SHIFT; + unsigned long flags; + dma_addr_t iova; + int i; + + if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT) + order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT; + + count = PAGE_ALIGN(size) >> PAGE_SHIFT; + align = (1 << order) - 1; + + spin_lock_irqsave(&mapping->lock, flags); + for (i = 0; i < mapping->nr_bitmaps; i++) { + start = bitmap_find_next_zero_area(mapping->bitmaps[i], + mapping->bits, 0, count, align); + + if (start > mapping->bits) + continue; + + bitmap_set(mapping->bitmaps[i], start, count); + break; + } + + /* + * No unused range found. Try to extend the existing mapping + * and perform a second attempt to reserve an IO virtual + * address range of size bytes. + */ + if (i == mapping->nr_bitmaps) { + if (extend_iommu_mapping(mapping)) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_ERROR_CODE; + } + + start = bitmap_find_next_zero_area(mapping->bitmaps[i], + mapping->bits, 0, count, align); + + if (start > mapping->bits) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_ERROR_CODE; + } + + bitmap_set(mapping->bitmaps[i], start, count); + } + spin_unlock_irqrestore(&mapping->lock, flags); + + iova = mapping->base + (mapping_size * i); + iova += start << PAGE_SHIFT; + + return iova; +} + +static inline void __free_iova(struct dma_iommu_mapping *mapping, + dma_addr_t addr, size_t size) +{ + unsigned int start, count; + size_t mapping_size = mapping->bits << PAGE_SHIFT; + unsigned long flags; + dma_addr_t bitmap_base; + u32 bitmap_index; + + if (!size) + return; + + bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size; + BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions); + + bitmap_base = mapping->base + mapping_size * bitmap_index; + + start = (addr - bitmap_base) >> PAGE_SHIFT; + + if (addr + size > bitmap_base + mapping_size) { + /* + * The address range to be freed reaches into the iova + * range of the next bitmap. This should not happen as + * we don't allow this in __alloc_iova (at the + * moment). + */ + BUG(); + } else + count = size >> PAGE_SHIFT; + + spin_lock_irqsave(&mapping->lock, flags); + bitmap_clear(mapping->bitmaps[bitmap_index], start, count); + spin_unlock_irqrestore(&mapping->lock, flags); +} + +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, + gfp_t gfp, struct dma_attrs *attrs) +{ + struct page **pages; + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i = 0; + + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, gfp); + else + pages = vzalloc(array_size); + if (!pages) + return NULL; + + if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) + { + unsigned long order = get_order(size); + struct page *page; + + page = dma_alloc_from_contiguous(dev, count, order); + if (!page) + goto error; + + __dma_clear_buffer(page, size); + + for (i = 0; i < count; i++) + pages[i] = page + i; + + return pages; + } + + /* + * IOMMU can map any pages, so himem can also be used here + */ + gfp |= __GFP_NOWARN | __GFP_HIGHMEM; + + while (count) { + int j, order = __fls(count); + + pages[i] = alloc_pages(gfp, order); + while (!pages[i] && order) + pages[i] = alloc_pages(gfp, --order); + if (!pages[i]) + goto error; + + if (order) { + split_page(pages[i], order); + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + } + + __dma_clear_buffer(pages[i], PAGE_SIZE << order); + i += 1 << order; + count -= 1 << order; + } + + return pages; +error: + while (i--) + if (pages[i]) + __free_pages(pages[i], 0); + if (array_size <= PAGE_SIZE) + kfree(pages); + else + vfree(pages); + return NULL; +} + +static int __iommu_free_buffer(struct device *dev, struct page **pages, + size_t size, struct dma_attrs *attrs) +{ + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i; + + if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) { + dma_release_from_contiguous(dev, pages[0], count); + } else { + for (i = 0; i < count; i++) + if (pages[i]) + __free_pages(pages[i], 0); + } + + if (array_size <= PAGE_SIZE) + kfree(pages); + else + vfree(pages); + return 0; +} + +/* + * Create a CPU mapping for a specified pages + */ +static void * +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) +{ + unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct vm_struct *area; + unsigned long p; + + area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, + caller); + if (!area) + return NULL; + + area->pages = pages; + area->nr_pages = nr_pages; + p = (unsigned long)area->addr; + + for (i = 0; i < nr_pages; i++) { + phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i])); + if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot)) + goto err; + p += PAGE_SIZE; + } + return area->addr; +err: + unmap_kernel_range((unsigned long)area->addr, size); + vunmap(area->addr); + return NULL; +} + +/* + * Create a mapping in device IO address space for specified pages + */ +static dma_addr_t +__iommu_create_mapping(struct device *dev, struct page **pages, size_t size) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + dma_addr_t dma_addr, iova; + int i, ret = DMA_ERROR_CODE; + + dma_addr = __alloc_iova(mapping, size); + if (dma_addr == DMA_ERROR_CODE) + return dma_addr; + + iova = dma_addr; + for (i = 0; i < count; ) { + unsigned int next_pfn = page_to_pfn(pages[i]) + 1; + phys_addr_t phys = page_to_phys(pages[i]); + unsigned int len, j; + + for (j = i + 1; j < count; j++, next_pfn++) + if (page_to_pfn(pages[j]) != next_pfn) + break; + + len = (j - i) << PAGE_SHIFT; + ret = iommu_map(mapping->domain, iova, phys, len, + IOMMU_READ|IOMMU_WRITE); + if (ret < 0) + goto fail; + iova += len; + i = j; + } + return dma_addr; +fail: + iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); + __free_iova(mapping, dma_addr, size); + return DMA_ERROR_CODE; +} + +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + + /* + * add optional in-page offset from iova to size and align + * result to page size + */ + size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); + iova &= PAGE_MASK; + + iommu_unmap(mapping->domain, iova, size); + __free_iova(mapping, iova, size); + return 0; +} + +static struct page **__atomic_get_pages(void *addr) +{ + struct dma_pool *pool = &atomic_pool; + struct page **pages = pool->pages; + int offs = (addr - pool->vaddr) >> PAGE_SHIFT; + + return pages + offs; +} + +static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) +{ + struct vm_struct *area; + + if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) + return __atomic_get_pages(cpu_addr); + + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + return cpu_addr; + + area = find_vm_area(cpu_addr); + if (area && (area->flags & VM_ARM_DMA_CONSISTENT)) + return area->pages; + return NULL; +} + +static void *__iommu_alloc_atomic(struct device *dev, size_t size, + dma_addr_t *handle) +{ + struct page *page; + void *addr; + + addr = __alloc_from_pool(size, &page); + if (!addr) + return NULL; + + *handle = __iommu_create_mapping(dev, &page, size); + if (*handle == DMA_ERROR_CODE) + goto err_mapping; + + return addr; + +err_mapping: + __free_from_pool(addr, size); + return NULL; +} + +static void __iommu_free_atomic(struct device *dev, void *cpu_addr, + dma_addr_t handle, size_t size) +{ + __iommu_remove_mapping(dev, handle, size); + __free_from_pool(cpu_addr, size); +} + +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); + struct page **pages; + void *addr = NULL; + + *handle = DMA_ERROR_CODE; + size = PAGE_ALIGN(size); + + if (!(gfp & __GFP_WAIT)) + return __iommu_alloc_atomic(dev, size, handle); + + /* + * Following is a work-around (a.k.a. hack) to prevent pages + * with __GFP_COMP being passed to split_page() which cannot + * handle them. The real problem is that this flag probably + * should be 0 on ARM as it is not supported on this + * platform; see CONFIG_HUGETLBFS. + */ + gfp &= ~(__GFP_COMP); + + pages = __iommu_alloc_buffer(dev, size, gfp, attrs); + if (!pages) + return NULL; + + *handle = __iommu_create_mapping(dev, pages, size); + if (*handle == DMA_ERROR_CODE) + goto err_buffer; + + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) + return pages; + + addr = __iommu_alloc_remap(pages, size, gfp, prot, + __builtin_return_address(0)); + if (!addr) + goto err_mapping; + + return addr; + +err_mapping: + __iommu_remove_mapping(dev, *handle, size); +err_buffer: + __iommu_free_buffer(dev, pages, size, attrs); + return NULL; +} + +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + unsigned long uaddr = vma->vm_start; + unsigned long usize = vma->vm_end - vma->vm_start; + struct page **pages = __iommu_get_pages(cpu_addr, attrs); + + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); + + if (!pages) + return -ENXIO; + + do { + int ret = vm_insert_page(vma, uaddr, *pages++); + if (ret) { + pr_err("Remapping memory failed: %d\n", ret); + return ret; + } + uaddr += PAGE_SIZE; + usize -= PAGE_SIZE; + } while (usize > 0); + + return 0; +} + +/* + * free a page as defined by the above mapping. + * Must not be called with IRQs disabled. + */ +void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + struct page **pages; + size = PAGE_ALIGN(size); + + if (__in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, cpu_addr, handle, size); + return; + } + + pages = __iommu_get_pages(cpu_addr, attrs); + if (!pages) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + + if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); + } + + __iommu_remove_mapping(dev, handle, size); + __iommu_free_buffer(dev, pages, size, attrs); +} + +static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, + size_t size, struct dma_attrs *attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page **pages = __iommu_get_pages(cpu_addr, attrs); + + if (!pages) + return -ENXIO; + + return sg_alloc_table_from_pages(sgt, pages, count, 0, size, + GFP_KERNEL); +} + +static int __dma_direction_to_prot(enum dma_data_direction dir) +{ + int prot; + + switch (dir) { + case DMA_BIDIRECTIONAL: + prot = IOMMU_READ | IOMMU_WRITE; + break; + case DMA_TO_DEVICE: + prot = IOMMU_READ; + break; + case DMA_FROM_DEVICE: + prot = IOMMU_WRITE; + break; + default: + prot = 0; + } + + return prot; +} + +/* + * Map a part of the scatter-gather list into contiguous io address space + */ +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, + size_t size, dma_addr_t *handle, + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova, iova_base; + int ret = 0; + unsigned int count; + struct scatterlist *s; + int prot; + + size = PAGE_ALIGN(size); + *handle = DMA_ERROR_CODE; + + iova_base = iova = __alloc_iova(mapping, size); + if (iova == DMA_ERROR_CODE) + return -ENOMEM; + + for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { + phys_addr_t phys = page_to_phys(sg_page(s)); + unsigned int len = PAGE_ALIGN(s->offset + s->length); + + if (!is_coherent && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); + + prot = __dma_direction_to_prot(dir); + + ret = iommu_map(mapping->domain, iova, phys, len, prot); + if (ret < 0) + goto fail; + count += len >> PAGE_SHIFT; + iova += len; + } + *handle = iova_base; + + return 0; +fail: + iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); + __free_iova(mapping, iova_base, size); + return ret; +} + +static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) +{ + struct scatterlist *s = sg, *dma = sg, *start = sg; + int i, count = 0; + unsigned int offset = s->offset; + unsigned int size = s->offset + s->length; + unsigned int max = dma_get_max_seg_size(dev); + + for (i = 1; i < nents; i++) { + s = sg_next(s); + + s->dma_address = DMA_ERROR_CODE; + s->dma_length = 0; + + if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { + if (__map_sg_chunk(dev, start, size, &dma->dma_address, + dir, attrs, is_coherent) < 0) + goto bad_mapping; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + size = offset = s->offset; + start = s; + dma = sg_next(dma); + count += 1; + } + size += s->length; + } + if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, + is_coherent) < 0) + goto bad_mapping; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + return count+1; + +bad_mapping: + for_each_sg(sg, s, count, i) + __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); + return 0; +} + +/** + * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of i/o coherent buffers described by scatterlist in streaming + * mode for DMA. The scatter gather list elements are merged together (if + * possible) and tagged with the appropriate dma address and length. They are + * obtained via sg_dma_{address,length}. + */ +int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * The scatter gather list elements are merged together (if possible) and + * tagged with the appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + */ +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, false); +} + +static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (sg_dma_len(s)) + __iommu_remove_mapping(dev, sg_dma_address(s), + sg_dma_len(s)); + if (!is_coherent && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_dev_to_cpu(sg_page(s), s->offset, + s->length, dir); + } +} + +/** + * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, false); +} + +/** + * arm_iommu_sync_sg_for_cpu + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); + +} + +/** + * arm_iommu_sync_sg_for_device + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); +} + + +/** + * arm_coherent_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Coherent IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t dma_addr; + int ret, prot, len = PAGE_ALIGN(size + offset); + + dma_addr = __alloc_iova(mapping, len); + if (dma_addr == DMA_ERROR_CODE) + return dma_addr; + + prot = __dma_direction_to_prot(dir); + + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); + if (ret < 0) + goto fail; + + return dma_addr + offset; +fail: + __free_iova(mapping, dma_addr, len); + return DMA_ERROR_CODE; +} + +/** + * arm_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_cpu_to_dev(page, offset, size, dir); + + return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); +} + +/** + * arm_coherent_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * Coherent IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +/** + * arm_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_dev_to_cpu(page, offset, size, dir); + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +static void arm_iommu_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + unsigned int offset = handle & ~PAGE_MASK; + + if (!iova) + return; + + __dma_page_dev_to_cpu(page, offset, size, dir); +} + +static void arm_iommu_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + unsigned int offset = handle & ~PAGE_MASK; + + if (!iova) + return; + + __dma_page_cpu_to_dev(page, offset, size, dir); +} + +struct dma_map_ops iommu_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, + + .map_page = arm_iommu_map_page, + .unmap_page = arm_iommu_unmap_page, + .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, + .sync_single_for_device = arm_iommu_sync_single_for_device, + + .map_sg = arm_iommu_map_sg, + .unmap_sg = arm_iommu_unmap_sg, + .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, + .sync_sg_for_device = arm_iommu_sync_sg_for_device, + + .set_dma_mask = arm_dma_set_mask, +}; + +struct dma_map_ops iommu_coherent_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, + + .map_page = arm_coherent_iommu_map_page, + .unmap_page = arm_coherent_iommu_unmap_page, + + .map_sg = arm_coherent_iommu_map_sg, + .unmap_sg = arm_coherent_iommu_unmap_sg, + + .set_dma_mask = arm_dma_set_mask, +}; + +/** + * arm_iommu_create_mapping + * @bus: pointer to the bus holding the client device (for IOMMU calls) + * @base: start address of the valid IO address space + * @size: maximum size of the valid IO address space + * + * Creates a mapping structure which holds information about used/unused + * IO address ranges, which is required to perform memory allocation and + * mapping with IOMMU aware functions. + * + * The client device need to be attached to the mapping with + * arm_iommu_attach_device function. + */ +struct dma_iommu_mapping * +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size) +{ + unsigned int bits = size >> PAGE_SHIFT; + unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); + struct dma_iommu_mapping *mapping; + int extensions = 1; + int err = -ENOMEM; + + if (!bitmap_size) + return ERR_PTR(-EINVAL); + + if (bitmap_size > PAGE_SIZE) { + extensions = bitmap_size / PAGE_SIZE; + bitmap_size = PAGE_SIZE; + } + + mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); + if (!mapping) + goto err; + + mapping->bitmap_size = bitmap_size; + mapping->bitmaps = kzalloc(extensions * sizeof(unsigned long *), + GFP_KERNEL); + if (!mapping->bitmaps) + goto err2; + + mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL); + if (!mapping->bitmaps[0]) + goto err3; + + mapping->nr_bitmaps = 1; + mapping->extensions = extensions; + mapping->base = base; + mapping->bits = BITS_PER_BYTE * bitmap_size; + + spin_lock_init(&mapping->lock); + + mapping->domain = iommu_domain_alloc(bus); + if (!mapping->domain) + goto err4; + + kref_init(&mapping->kref); + return mapping; +err4: + kfree(mapping->bitmaps[0]); +err3: + kfree(mapping->bitmaps); +err2: + kfree(mapping); +err: + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(arm_iommu_create_mapping); + +static void release_iommu_mapping(struct kref *kref) +{ + int i; + struct dma_iommu_mapping *mapping = + container_of(kref, struct dma_iommu_mapping, kref); + + iommu_domain_free(mapping->domain); + for (i = 0; i < mapping->nr_bitmaps; i++) + kfree(mapping->bitmaps[i]); + kfree(mapping->bitmaps); + kfree(mapping); +} + +static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) +{ + int next_bitmap; + + if (mapping->nr_bitmaps > mapping->extensions) + return -EINVAL; + + next_bitmap = mapping->nr_bitmaps; + mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size, + GFP_ATOMIC); + if (!mapping->bitmaps[next_bitmap]) + return -ENOMEM; + + mapping->nr_bitmaps++; + + return 0; +} + +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) +{ + if (mapping) + kref_put(&mapping->kref, release_iommu_mapping); +} +EXPORT_SYMBOL_GPL(arm_iommu_release_mapping); + +/** + * arm_iommu_attach_device + * @dev: valid struct device pointer + * @mapping: io address space mapping structure (returned from + * arm_iommu_create_mapping) + * + * Attaches specified io address space mapping to the provided device, + * this replaces the dma operations (dma_map_ops pointer) with the + * IOMMU aware version. More than one client might be attached to + * the same io address space mapping. + */ +int arm_iommu_attach_device(struct device *dev, + struct dma_iommu_mapping *mapping) +{ + int err; + + err = iommu_attach_device(mapping->domain, dev); + if (err) + return err; + + kref_get(&mapping->kref); + dev->archdata.mapping = mapping; + set_dma_ops(dev, &iommu_ops); + + pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); + return 0; +} +EXPORT_SYMBOL_GPL(arm_iommu_attach_device); + +/** + * arm_iommu_detach_device + * @dev: valid struct device pointer + * + * Detaches the provided device from a previously attached map. + * This voids the dma operations (dma_map_ops pointer) + */ +void arm_iommu_detach_device(struct device *dev) +{ + struct dma_iommu_mapping *mapping; + + mapping = to_dma_iommu_mapping(dev); + if (!mapping) { + dev_warn(dev, "Not attached\n"); + return; + } + + iommu_detach_device(mapping->domain, dev); + kref_put(&mapping->kref, release_iommu_mapping); + dev->archdata.mapping = NULL; + set_dma_ops(dev, NULL); + + pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); +} +EXPORT_SYMBOL_GPL(arm_iommu_detach_device); + +#endif diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c new file mode 100644 index 00000000000..c508f41a43b --- /dev/null +++ b/arch/arm/mm/dump.c @@ -0,0 +1,365 @@ +/* + * Debug helper to dump the current kernel pagetables of the system + * so that we can see what the various memory ranges are set to. + * + * Derived from x86 implementation: + * (C) Copyright 2008 Intel Corporation + * + * Author: Arjan van de Ven <arjan@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/seq_file.h> + +#include <asm/fixmap.h> +#include <asm/pgtable.h> + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +static struct addr_marker address_markers[] = { + { MODULES_VADDR, "Modules" }, + { PAGE_OFFSET, "Kernel Mapping" }, + { 0, "vmalloc() Area" }, + { VMALLOC_END, "vmalloc() End" }, + { FIXADDR_START, "Fixmap Area" }, + { CONFIG_VECTORS_BASE, "Vectors" }, + { CONFIG_VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" }, + { -1, NULL }, +}; + +struct pg_state { + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned level; + u64 current_prot; +}; + +struct prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +static const struct prot_bits pte_bits[] = { + { + .mask = L_PTE_USER, + .val = L_PTE_USER, + .set = "USR", + .clear = " ", + }, { + .mask = L_PTE_RDONLY, + .val = L_PTE_RDONLY, + .set = "ro", + .clear = "RW", + }, { + .mask = L_PTE_XN, + .val = L_PTE_XN, + .set = "NX", + .clear = "x ", + }, { + .mask = L_PTE_SHARED, + .val = L_PTE_SHARED, + .set = "SHD", + .clear = " ", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_UNCACHED, + .set = "SO/UNCACHED", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_BUFFERABLE, + .set = "MEM/BUFFERABLE/WC", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_WRITETHROUGH, + .set = "MEM/CACHED/WT", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_WRITEBACK, + .set = "MEM/CACHED/WBRA", +#ifndef CONFIG_ARM_LPAE + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_MINICACHE, + .set = "MEM/MINICACHE", +#endif + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_WRITEALLOC, + .set = "MEM/CACHED/WBWA", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_SHARED, + .set = "DEV/SHARED", +#ifndef CONFIG_ARM_LPAE + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_NONSHARED, + .set = "DEV/NONSHARED", +#endif + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_WC, + .set = "DEV/WC", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_CACHED, + .set = "DEV/CACHED", + }, +}; + +static const struct prot_bits section_bits[] = { +#ifdef CONFIG_ARM_LPAE + { + .mask = PMD_SECT_USER, + .val = PMD_SECT_USER, + .set = "USR", + }, { + .mask = PMD_SECT_RDONLY, + .val = PMD_SECT_RDONLY, + .set = "ro", + .clear = "RW", +#elif __LINUX_ARM_ARCH__ >= 6 + { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_APX | PMD_SECT_AP_WRITE, + .set = " ro", + }, { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_WRITE, + .set = " RW", + }, { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ, + .set = "USR ro", + }, { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .set = "USR RW", +#else /* ARMv4/ARMv5 */ + /* These are approximate */ + { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = 0, + .set = " ro", + }, { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_WRITE, + .set = " RW", + }, { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ, + .set = "USR ro", + }, { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .set = "USR RW", +#endif + }, { + .mask = PMD_SECT_XN, + .val = PMD_SECT_XN, + .set = "NX", + .clear = "x ", + }, { + .mask = PMD_SECT_S, + .val = PMD_SECT_S, + .set = "SHD", + .clear = " ", + }, +}; + +struct pg_level { + const struct prot_bits *bits; + size_t num; + u64 mask; +}; + +static struct pg_level pg_level[] = { + { + }, { /* pgd */ + }, { /* pud */ + }, { /* pmd */ + .bits = section_bits, + .num = ARRAY_SIZE(section_bits), + }, { /* pte */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, +}; + +static void dump_prot(struct pg_state *st, const struct prot_bits *bits, size_t num) +{ + unsigned i; + + for (i = 0; i < num; i++, bits++) { + const char *s; + + if ((st->current_prot & bits->mask) == bits->val) + s = bits->set; + else + s = bits->clear; + + if (s) + seq_printf(st->seq, " %s", s); + } +} + +static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u64 val) +{ + static const char units[] = "KMGTPE"; + u64 prot = val & pg_level[level].mask; + + if (addr < USER_PGTABLES_CEILING) + return; + + if (!st->level) { + st->level = level; + st->current_prot = prot; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } else if (prot != st->current_prot || level != st->level || + addr >= st->marker[1].start_address) { + const char *unit = units; + unsigned long delta; + + if (st->current_prot) { + seq_printf(st->seq, "0x%08lx-0x%08lx ", + st->start_address, addr); + + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(st->seq, "%9lu%c", delta, *unit); + if (pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num); + seq_printf(st->seq, "\n"); + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + st->start_address = addr; + st->current_prot = prot; + st->level = level; + } +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + addr = start + i * PAGE_SIZE; + note_page(st, addr, 4, pte_val(*pte)); + } +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + addr = start + i * PMD_SIZE; + if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd)) + note_page(st, addr, 3, pmd_val(*pmd)); + else + walk_pte(st, pmd, addr); + + if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1])) + note_page(st, addr + SECTION_SIZE, 3, pmd_val(pmd[1])); + } +} + +static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + pud_t *pud = pud_offset(pgd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + addr = start + i * PUD_SIZE; + if (!pud_none(*pud)) { + walk_pmd(st, pud, addr); + } else { + note_page(st, addr, 2, pud_val(*pud)); + } + } +} + +static void walk_pgd(struct seq_file *m) +{ + pgd_t *pgd = swapper_pg_dir; + struct pg_state st; + unsigned long addr; + unsigned i, pgdoff = USER_PGTABLES_CEILING / PGDIR_SIZE; + + memset(&st, 0, sizeof(st)); + st.seq = m; + st.marker = address_markers; + + pgd += pgdoff; + + for (i = pgdoff; i < PTRS_PER_PGD; i++, pgd++) { + addr = i * PGDIR_SIZE; + if (!pgd_none(*pgd)) { + walk_pud(&st, pgd, addr); + } else { + note_page(&st, addr, 1, pgd_val(*pgd)); + } + } + + note_page(&st, 0, 0, 0); +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + walk_pgd(m); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ptdump_init(void) +{ + struct dentry *pe; + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + if (pg_level[i].bits) + for (j = 0; j < pg_level[i].num; j++) + pg_level[i].mask |= pg_level[i].bits[j].mask; + + address_markers[2].start_address = VMALLOC_START; + + pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + &ptdump_fops); + return pe ? 0 : -ENOMEM; +} +__initcall(ptdump_init); diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c index 9592c3ee4cb..312e15e6d00 100644 --- a/arch/arm/mm/extable.c +++ b/arch/arm/mm/extable.c @@ -2,15 +2,20 @@ * linux/arch/arm/mm/extable.c */ #include <linux/module.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> int fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *fixup; fixup = search_exception_tables(instruction_pointer(regs)); - if (fixup) + if (fixup) { regs->ARM_pc = fixup->fixup; +#ifdef CONFIG_THUMB2_KERNEL + /* Clear the IT state to avoid nasty surprises in the fixup */ + regs->ARM_cpsr &= ~PSR_IT_MASK; +#endif + } return fixup != NULL; } diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 7fc1b35a674..ff379ac115d 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -8,7 +8,6 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/module.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -16,13 +15,19 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/pagemap.h> +#include <linux/gfp.h> +#include <asm/bugs.h> #include <asm/cacheflush.h> +#include <asm/cachetype.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> -static unsigned long shared_pte_mask = L_PTE_CACHEABLE; +#include "mm.h" +static pteval_t shared_pte_mask = L_PTE_MT_BUFFERABLE; + +#if __LINUX_ARM_ARCH__ < 6 /* * We take the easy way out of this problem - we make the * PTE uncacheable. However, we leave the write buffer on. @@ -32,61 +37,103 @@ static unsigned long shared_pte_mask = L_PTE_CACHEABLE; * Therefore those configurations which might call adjust_pte (those * without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock. */ -static int adjust_pte(struct vm_area_struct *vma, unsigned long address) +static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address, + unsigned long pfn, pte_t *ptep) +{ + pte_t entry = *ptep; + int ret; + + /* + * If this page is present, it's actually being shared. + */ + ret = pte_present(entry); + + /* + * If this page isn't present, or is already setup to + * fault (ie, is old), we can safely ignore any issues. + */ + if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) { + flush_cache_page(vma, address, pfn); + outer_flush_range((pfn << PAGE_SHIFT), + (pfn << PAGE_SHIFT) + PAGE_SIZE); + pte_val(entry) &= ~L_PTE_MT_MASK; + pte_val(entry) |= shared_pte_mask; + set_pte_at(vma->vm_mm, address, ptep, entry); + flush_tlb_page(vma, address); + } + + return ret; +} + +#if USE_SPLIT_PTE_PTLOCKS +/* + * If we are using split PTE locks, then we need to take the page + * lock here. Otherwise we are using shared mm->page_table_lock + * which is already locked, thus cannot take it. + */ +static inline void do_pte_lock(spinlock_t *ptl) { + /* + * Use nested version here to indicate that we are already + * holding one similar spinlock. + */ + spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); +} + +static inline void do_pte_unlock(spinlock_t *ptl) +{ + spin_unlock(ptl); +} +#else /* !USE_SPLIT_PTE_PTLOCKS */ +static inline void do_pte_lock(spinlock_t *ptl) {} +static inline void do_pte_unlock(spinlock_t *ptl) {} +#endif /* USE_SPLIT_PTE_PTLOCKS */ + +static int adjust_pte(struct vm_area_struct *vma, unsigned long address, + unsigned long pfn) +{ + spinlock_t *ptl; pgd_t *pgd; + pud_t *pud; pmd_t *pmd; - pte_t *pte, entry; - int ret = 0; + pte_t *pte; + int ret; pgd = pgd_offset(vma->vm_mm, address); - if (pgd_none(*pgd)) - goto no_pgd; - if (pgd_bad(*pgd)) - goto bad_pgd; + if (pgd_none_or_clear_bad(pgd)) + return 0; - pmd = pmd_offset(pgd, address); - if (pmd_none(*pmd)) - goto no_pmd; - if (pmd_bad(*pmd)) - goto bad_pmd; + pud = pud_offset(pgd, address); + if (pud_none_or_clear_bad(pud)) + return 0; - pte = pte_offset_map(pmd, address); - entry = *pte; + pmd = pmd_offset(pud, address); + if (pmd_none_or_clear_bad(pmd)) + return 0; /* - * If this page isn't present, or is already setup to - * fault (ie, is old), we can safely ignore any issues. + * This is called while another page table is mapped, so we + * must use the nested version. This also means we need to + * open-code the spin-locking. */ - if (pte_present(entry) && pte_val(entry) & shared_pte_mask) { - flush_cache_page(vma, address, pte_pfn(entry)); - pte_val(entry) &= ~shared_pte_mask; - set_pte(pte, entry); - flush_tlb_page(vma, address); - ret = 1; - } + ptl = pte_lockptr(vma->vm_mm, pmd); + pte = pte_offset_map(pmd, address); + do_pte_lock(ptl); + + ret = do_adjust_pte(vma, address, pfn, pte); + + do_pte_unlock(ptl); pte_unmap(pte); - return ret; -bad_pgd: - pgd_ERROR(*pgd); - pgd_clear(pgd); -no_pgd: - return 0; - -bad_pmd: - pmd_ERROR(*pmd); - pmd_clear(pmd); -no_pmd: - return 0; + return ret; } static void -make_coherent(struct address_space *mapping, struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) +make_coherent(struct address_space *mapping, struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned long pfn) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *mpnt; - struct prio_tree_iter iter; unsigned long offset; pgoff_t pgoff; int aliases = 0; @@ -99,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, unsigne * cache coherency. */ flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { /* * If this VMA is not in our MM, we can ignore it. * Note that we intentionally mask out the VMA @@ -110,23 +157,19 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, unsigne if (!(mpnt->vm_flags & VM_MAYSHARE)) continue; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; - aliases += adjust_pte(mpnt, mpnt->vm_start + offset); + aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn); } flush_dcache_mmap_unlock(mapping); if (aliases) - adjust_pte(vma, addr); - else - flush_cache_page(vma, addr, pfn); + do_adjust_pte(vma, addr, pfn, ptep); } -void __flush_dcache_page(struct address_space *mapping, struct page *page); - /* * Take care of architecture specific things when placing a new PTE into * a page table, or changing an existing PTE. Basically, there are two * things that we need to take care of: * - * 1. If PG_dcache_dirty is set for the page, we need to ensure + * 1. If PG_dcache_clean is not set for the page, we need to ensure * that any cache entries for the kernels virtual memory * range are written back to the page. * 2. If we have multiple shared mappings of the same space in @@ -134,27 +177,35 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page); * * Note that the pte lock will be held. */ -void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte) +void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) { - unsigned long pfn = pte_pfn(pte); + unsigned long pfn = pte_pfn(*ptep); struct address_space *mapping; struct page *page; if (!pfn_valid(pfn)) return; + /* + * The zero page is never written to, so never has any dirty + * cache lines, and therefore never needs to be flushed. + */ page = pfn_to_page(pfn); + if (page == ZERO_PAGE(0)) + return; + mapping = page_mapping(page); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + __flush_dcache_page(mapping, page); if (mapping) { - int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags); - - if (dirty) - __flush_dcache_page(mapping, page); - if (cache_is_vivt()) - make_coherent(mapping, vma, addr, pfn); + make_coherent(mapping, vma, addr, ptep, pfn); + else if (vma->vm_flags & VM_EXEC) + __flush_icache_all(); } } +#endif /* __LINUX_ARM_ARCH__ < 6 */ /* * Check whether the write buffer has physical address aliasing @@ -189,9 +240,8 @@ void __init check_writebuffer_bugs(void) page = alloc_page(GFP_KERNEL); if (page) { unsigned long *p1, *p2; - pgprot_t prot = __pgprot(L_PTE_PRESENT|L_PTE_YOUNG| - L_PTE_DIRTY|L_PTE_WRITE| - L_PTE_BUFFERABLE); + pgprot_t prot = __pgprot_modify(PAGE_KERNEL, + L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE); p1 = vmap(&page, 1, VM_IOREMAP, prot); p2 = vmap(&page, 1, VM_IOREMAP, prot); @@ -212,7 +262,7 @@ void __init check_writebuffer_bugs(void) if (v) { printk("failed, %s\n", reason); - shared_pte_mask |= L_PTE_BUFFERABLE; + shared_pte_mask = L_PTE_MT_UNCACHED; } else { printk("ok\n"); } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 4a884baf3b9..eb8830a4c5e 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -8,20 +8,50 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/signal.h> -#include <linux/ptrace.h> #include <linux/mm.h> +#include <linux/hardirq.h> #include <linux/init.h> - -#include <asm/system.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/page-flags.h> +#include <linux/sched.h> +#include <linux/highmem.h> +#include <linux/perf_event.h> + +#include <asm/exception.h> #include <asm/pgtable.h> +#include <asm/system_misc.h> +#include <asm/system_info.h> #include <asm/tlbflush.h> -#include <asm/uaccess.h> #include "fault.h" +#ifdef CONFIG_MMU + +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) +{ + int ret = 0; + + if (!user_mode(regs)) { + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, fsr)) + ret = 1; + preempt_enable(); + } + + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) +{ + return 0; +} +#endif + /* * This is useful to dump out the page tables associated with * 'addr' in mm 'mm'. @@ -35,9 +65,11 @@ void show_pte(struct mm_struct *mm, unsigned long addr) printk(KERN_ALERT "pgd = %p\n", mm->pgd); pgd = pgd_offset(mm, addr); - printk(KERN_ALERT "[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); + printk(KERN_ALERT "[%08lx] *pgd=%08llx", + addr, (long long)pgd_val(*pgd)); do { + pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -49,10 +81,21 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; } - pmd = pmd_offset(pgd, addr); -#if PTRS_PER_PMD != 1 - printk(", *pmd=%08lx", pmd_val(*pmd)); -#endif + pud = pud_offset(pgd, addr); + if (PTRS_PER_PUD != 1) + printk(", *pud=%08llx", (long long)pud_val(*pud)); + + if (pud_none(*pud)) + break; + + if (pud_bad(*pud)) { + printk("(bad)"); + break; + } + + pmd = pmd_offset(pud, addr); + if (PTRS_PER_PMD != 1) + printk(", *pmd=%08llx", (long long)pmd_val(*pmd)); if (pmd_none(*pmd)) break; @@ -62,17 +105,25 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; } -#ifndef CONFIG_HIGHMEM /* We must not map this if we have highmem enabled */ + if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT))) + break; + pte = pte_offset_map(pmd, addr); - printk(", *pte=%08lx", pte_val(*pte)); - printk(", *ppte=%08lx", pte_val(pte[-PTRS_PER_PTE])); - pte_unmap(pte); + printk(", *pte=%08llx", (long long)pte_val(*pte)); +#ifndef CONFIG_ARM_LPAE + printk(", *ppte=%08llx", + (long long)pte_val(pte[PTE_HWTABLE_PTRS])); #endif + pte_unmap(pte); } while(0); printk("\n"); } +#else /* CONFIG_MMU */ +void show_pte(struct mm_struct *mm, unsigned long addr) +{ } +#endif /* CONFIG_MMU */ /* * Oops. The kernel tried to access some page that wasn't present. @@ -114,7 +165,8 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr, struct siginfo si; #ifdef CONFIG_DEBUG_USER - if (user_debug & UDBG_SEGV) { + if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || + ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", tsk->comm, sig, addr, fsr); show_pte(tsk->mm, addr); @@ -132,10 +184,11 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr, force_sig_info(sig, &si, tsk); } -void -do_bad_area(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, - unsigned int fsr, struct pt_regs *regs) +void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->active_mm; + /* * If we are in kernel mode at this point, we * have no context to handle this fault with. @@ -146,21 +199,39 @@ do_bad_area(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, __do_kernel_fault(mm, addr, fsr, regs); } -#define VM_FAULT_BADMAP (-20) -#define VM_FAULT_BADACCESS (-21) +#ifdef CONFIG_MMU +#define VM_FAULT_BADMAP 0x010000 +#define VM_FAULT_BADACCESS 0x020000 -static int +/* + * Check that the permissions on the VMA allow for the fault which occurred. + * If we encountered a write fault, we must have write permission, otherwise + * we allow any permission. + */ +static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) +{ + unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + + if (fsr & FSR_WRITE) + mask = VM_WRITE; + if (fsr & FSR_LNX_PF) + mask = VM_EXEC; + + return vma->vm_flags & mask ? false : true; +} + +static int __kprobes __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, - struct task_struct *tsk) + unsigned int flags, struct task_struct *tsk) { struct vm_area_struct *vma; - int fault, mask; + int fault; vma = find_vma(mm, addr); fault = VM_FAULT_BADMAP; - if (!vma) + if (unlikely(!vma)) goto out; - if (vma->vm_start > addr) + if (unlikely(vma->vm_start > addr)) goto check_stack; /* @@ -168,71 +239,52 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, * memory access, so we can handle it. */ good_area: - if (fsr & (1 << 11)) /* write? */ - mask = VM_WRITE; - else - mask = VM_READ|VM_EXEC; - - fault = VM_FAULT_BADACCESS; - if (!(vma->vm_flags & mask)) + if (access_error(fsr, vma)) { + fault = VM_FAULT_BADACCESS; goto out; - - /* - * If for any reason at all we couldn't handle - * the fault, make sure we exit gracefully rather - * than endlessly redo the fault. - */ -survive: - fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, fsr & (1 << 11)); - - /* - * Handle the "normal" cases first - successful and sigbus - */ - switch (fault) { - case VM_FAULT_MAJOR: - tsk->maj_flt++; - return fault; - case VM_FAULT_MINOR: - tsk->min_flt++; - case VM_FAULT_SIGBUS: - return fault; } - if (tsk->pid != 1) - goto out; - - /* - * If we are out of memory for pid1, sleep for a while and retry - */ - up_read(&mm->mmap_sem); - yield(); - down_read(&mm->mmap_sem); - goto survive; + return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) + /* Don't allow expansion below FIRST_USER_ADDRESS */ + if (vma->vm_flags & VM_GROWSDOWN && + addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr)) goto good_area; out: return fault; } -static int +static int __kprobes do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + if (notify_page_fault(regs, fsr)) + return 0; tsk = current; mm = tsk->mm; + /* Enable interrupts if they were enabled in the parent context. */ + if (interrupts_enabled(regs)) + local_irq_enable(); + /* * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_interrupt() || !mm) + if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (fsr & FSR_WRITE) + flags |= FAULT_FLAG_WRITE; + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -241,16 +293,63 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!down_read_trylock(&mm->mmap_sem)) { if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc)) goto no_context; +retry: down_read(&mm->mmap_sem); + } else { + /* + * The above down_read_trylock() might have succeeded in + * which case, we'll have missed the might_sleep() from + * down_read() + */ + might_sleep(); +#ifdef CONFIG_DEBUG_VM + if (!user_mode(regs) && + !search_exception_tables(regs->ARM_pc)) + goto no_context; +#endif + } + + fault = __do_page_fault(mm, addr, fsr, flags, tsk); + + /* If we need to retry but a fatal signal is pending, handle the + * signal first. We do not need to release the mmap_sem because + * it would already be released in __lock_page_or_retry in + * mm/filemap.c. */ + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return 0; + + /* + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. + */ + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, addr); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, addr); + } + if (fault & VM_FAULT_RETRY) { + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + goto retry; + } } - fault = __do_page_fault(mm, addr, fsr, tsk); up_read(&mm->mmap_sem); /* * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR */ - if (fault >= VM_FAULT_MINOR) + if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) return 0; /* @@ -260,27 +359,24 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!user_mode(regs)) goto no_context; - switch (fault) { - case VM_FAULT_OOM: + if (fault & VM_FAULT_OOM) { /* - * We ran out of memory, or some other thing - * happened to us that made us unable to handle - * the page fault gracefully. + * We ran out of memory, call the OOM killer, and return to + * userspace (which will retry the fault, or kill us if we + * got oom-killed) */ - printk("VM: killing process %s\n", tsk->comm); - do_exit(SIGKILL); + pagefault_out_of_memory(); return 0; + } - case VM_FAULT_SIGBUS: + if (fault & VM_FAULT_SIGBUS) { /* * We had some memory, but were unable to * successfully fix up this page fault. */ sig = SIGBUS; code = BUS_ADRERR; - break; - - default: + } else { /* * Something tried to access memory that * isn't in our memory map.. @@ -288,7 +384,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) sig = SIGSEGV; code = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR; - break; } __do_user_fault(tsk, addr, fsr, sig, code, regs); @@ -298,6 +393,13 @@ no_context: __do_kernel_fault(mm, addr, fsr, regs); return 0; } +#else /* CONFIG_MMU */ +static int +do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + return 0; +} +#endif /* CONFIG_MMU */ /* * First Level Translation Fault Handler @@ -316,59 +418,90 @@ no_context: * interrupt or a critical region, and should only copy the information * from the master page table, nothing more. */ -static int +#ifdef CONFIG_MMU +static int __kprobes do_translation_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - struct task_struct *tsk; unsigned int index; pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); + if (user_mode(regs)) + goto bad_area; + index = pgd_index(addr); - /* - * FIXME: CP15 C1 is write only on ARMv3 architectures. - */ pgd = cpu_get_pgd() + index; pgd_k = init_mm.pgd + index; if (pgd_none(*pgd_k)) goto bad_area; - if (!pgd_present(*pgd)) set_pgd(pgd, *pgd_k); - pmd_k = pmd_offset(pgd_k, addr); - pmd = pmd_offset(pgd, addr); + pud = pud_offset(pgd, addr); + pud_k = pud_offset(pgd_k, addr); - if (pmd_none(*pmd_k)) + if (pud_none(*pud_k)) + goto bad_area; + if (!pud_present(*pud)) + set_pud(pud, *pud_k); + + pmd = pmd_offset(pud, addr); + pmd_k = pmd_offset(pud_k, addr); + +#ifdef CONFIG_ARM_LPAE + /* + * Only one hardware entry per PMD with LPAE. + */ + index = 0; +#else + /* + * On ARM one Linux PGD entry contains two hardware entries (see page + * tables layout in pgtable.h). We normally guarantee that we always + * fill both L1 entries. But create_mapping() doesn't follow the rule. + * It can create inidividual L1 entries, so here we have to call + * pmd_none() check for the entry really corresponded to address, not + * for the first of pair. + */ + index = (addr >> SECTION_SHIFT) & 1; +#endif + if (pmd_none(pmd_k[index])) goto bad_area; copy_pmd(pmd, pmd_k); return 0; bad_area: - tsk = current; - - do_bad_area(tsk, tsk->active_mm, addr, fsr, regs); + do_bad_area(addr, fsr, regs); + return 0; +} +#else /* CONFIG_MMU */ +static int +do_translation_fault(unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ return 0; } +#endif /* CONFIG_MMU */ /* * Some section permission faults need to be handled gracefully. * They can happen due to a __{get,put}_user during an oops. */ +#ifndef CONFIG_ARM_LPAE static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - struct task_struct *tsk = current; - do_bad_area(tsk, tsk->active_mm, addr, fsr, regs); + do_bad_area(addr, fsr, regs); return 0; } +#endif /* CONFIG_ARM_LPAE */ /* * This abort handler always returns "fault". @@ -379,76 +512,43 @@ do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) return 1; } -static struct fsr_info { +struct fsr_info { int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs); int sig; int code; const char *name; -} fsr_info[] = { - /* - * The following are the standard ARMv3 and ARMv4 aborts. ARMv5 - * defines these to be "precise" aborts. - */ - { do_bad, SIGSEGV, 0, "vector exception" }, - { do_bad, SIGILL, BUS_ADRALN, "alignment exception" }, - { do_bad, SIGKILL, 0, "terminal exception" }, - { do_bad, SIGILL, BUS_ADRALN, "alignment exception" }, - { do_bad, SIGBUS, 0, "external abort on linefetch" }, - { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, - { do_bad, SIGBUS, 0, "external abort on linefetch" }, - { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, - { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" }, - { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" }, - { do_bad, SIGBUS, 0, "external abort on translation" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" }, - { do_bad, SIGBUS, 0, "external abort on translation" }, - { do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" }, - /* - * The following are "imprecise" aborts, which are signalled by bit - * 10 of the FSR, and may not be recoverable. These are only - * supported if the CPU abort handler supports bit 10. - */ - { do_bad, SIGBUS, 0, "unknown 16" }, - { do_bad, SIGBUS, 0, "unknown 17" }, - { do_bad, SIGBUS, 0, "unknown 18" }, - { do_bad, SIGBUS, 0, "unknown 19" }, - { do_bad, SIGBUS, 0, "lock abort" }, /* xscale */ - { do_bad, SIGBUS, 0, "unknown 21" }, - { do_bad, SIGBUS, BUS_OBJERR, "imprecise external abort" }, /* xscale */ - { do_bad, SIGBUS, 0, "unknown 23" }, - { do_bad, SIGBUS, 0, "dcache parity error" }, /* xscale */ - { do_bad, SIGBUS, 0, "unknown 25" }, - { do_bad, SIGBUS, 0, "unknown 26" }, - { do_bad, SIGBUS, 0, "unknown 27" }, - { do_bad, SIGBUS, 0, "unknown 28" }, - { do_bad, SIGBUS, 0, "unknown 29" }, - { do_bad, SIGBUS, 0, "unknown 30" }, - { do_bad, SIGBUS, 0, "unknown 31" } }; +/* FSR definition */ +#ifdef CONFIG_ARM_LPAE +#include "fsr-3level.c" +#else +#include "fsr-2level.c" +#endif + void __init hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), - int sig, const char *name) + int sig, int code, const char *name) { - if (nr >= 0 && nr < ARRAY_SIZE(fsr_info)) { - fsr_info[nr].fn = fn; - fsr_info[nr].sig = sig; - fsr_info[nr].name = name; - } + if (nr < 0 || nr >= ARRAY_SIZE(fsr_info)) + BUG(); + + fsr_info[nr].fn = fn; + fsr_info[nr].sig = sig; + fsr_info[nr].code = code; + fsr_info[nr].name = name; } /* * Dispatch a data abort to the relevant handler. */ -asmlinkage void +asmlinkage void __exception do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6); + const struct fsr_info *inf = fsr_info + fsr_fs(fsr); struct siginfo info; - if (!inf->fn(addr, fsr, regs)) + if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) return; printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n", @@ -458,12 +558,62 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) info.si_errno = 0; info.si_code = inf->code; info.si_addr = (void __user *)addr; - notify_die("", regs, &info, fsr, 0); + arm_notify_die("", regs, &info, fsr, 0); } -asmlinkage void -do_PrefetchAbort(unsigned long addr, struct pt_regs *regs) +void __init +hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), + int sig, int code, const char *name) { - do_translation_fault(addr, 0, regs); + if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info)) + BUG(); + + ifsr_info[nr].fn = fn; + ifsr_info[nr].sig = sig; + ifsr_info[nr].code = code; + ifsr_info[nr].name = name; } +asmlinkage void __exception +do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) +{ + const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr); + struct siginfo info; + + if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) + return; + + printk(KERN_ALERT "Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", + inf->name, ifsr, addr); + + info.si_signo = inf->sig; + info.si_errno = 0; + info.si_code = inf->code; + info.si_addr = (void __user *)addr; + arm_notify_die("", regs, &info, ifsr, 0); +} + +#ifndef CONFIG_ARM_LPAE +static int __init exceptions_init(void) +{ + if (cpu_architecture() >= CPU_ARCH_ARMv6) { + hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR, + "I-cache maintenance fault"); + } + + if (cpu_architecture() >= CPU_ARCH_ARMv7) { + /* + * TODO: Access flag faults introduced in ARMv6K. + * Runtime check for 'K' extension is needed + */ + hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR, + "section access flag fault"); + hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR, + "section access flag fault"); + } + + return 0; +} + +arch_initcall(exceptions_init); +#endif diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h index 73b59e83227..cf08bdfbe0d 100644 --- a/arch/arm/mm/fault.h +++ b/arch/arm/mm/fault.h @@ -1,6 +1,28 @@ -void do_bad_area(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, unsigned int fsr, struct pt_regs *regs); +#ifndef __ARCH_ARM_FAULT_H +#define __ARCH_ARM_FAULT_H -void show_pte(struct mm_struct *mm, unsigned long addr); +/* + * Fault status register encodings. We steal bit 31 for our own purposes. + */ +#define FSR_LNX_PF (1 << 31) +#define FSR_WRITE (1 << 11) +#define FSR_FS4 (1 << 10) +#define FSR_FS3_0 (15) +#define FSR_FS5_0 (0x3f) +#ifdef CONFIG_ARM_LPAE +static inline int fsr_fs(unsigned int fsr) +{ + return fsr & FSR_FS5_0; +} +#else +static inline int fsr_fs(unsigned int fsr) +{ + return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6; +} +#endif + +void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs); unsigned long search_exception_table(unsigned long addr); + +#endif /* __ARCH_ARM_FAULT_H */ diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index c9a03981b78..43d54f5b26b 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -10,42 +10,53 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/pagemap.h> +#include <linux/highmem.h> #include <asm/cacheflush.h> -#include <asm/system.h> +#include <asm/cachetype.h> +#include <asm/highmem.h> +#include <asm/smp_plat.h> #include <asm/tlbflush.h> +#include <linux/hugetlb.h> -#ifdef CONFIG_CPU_CACHE_VIPT - -#define ALIAS_FLUSH_START 0xffff4000 +#include "mm.h" -#define TOP_PTE(x) pte_offset_kernel(top_pmd, x) +#ifdef CONFIG_CPU_CACHE_VIPT static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) { - unsigned long to = ALIAS_FLUSH_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); + unsigned long to = FLUSH_ALIAS_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); + const int zero = 0; - set_pte(TOP_PTE(to), pfn_pte(pfn, PAGE_KERNEL)); - flush_tlb_kernel_page(to); + set_top_pte(to, pfn_pte(pfn, PAGE_KERNEL)); asm( "mcrr p15, 0, %1, %0, c14\n" - " mcrr p15, 0, %1, %0, c5\n" + " mcr p15, 0, %2, c7, c10, 4" : - : "r" (to), "r" (to + PAGE_SIZE - L1_CACHE_BYTES) + : "r" (to), "r" (to + PAGE_SIZE - L1_CACHE_BYTES), "r" (zero) : "cc"); } +static void flush_icache_alias(unsigned long pfn, unsigned long vaddr, unsigned long len) +{ + unsigned long va = FLUSH_ALIAS_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); + unsigned long offset = vaddr & (PAGE_SIZE - 1); + unsigned long to; + + set_top_pte(va, pfn_pte(pfn, PAGE_KERNEL)); + to = va + offset; + flush_icache_range(to, to + len); +} + void flush_cache_mm(struct mm_struct *mm) { if (cache_is_vivt()) { - if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) - __cpuc_flush_user_all(); + vivt_flush_cache_mm(mm); return; } if (cache_is_vipt_aliasing()) { asm( "mcr p15, 0, %0, c7, c14, 0\n" - " mcr p15, 0, %0, c7, c5, 0\n" " mcr p15, 0, %0, c7, c10, 4" : : "r" (0) @@ -56,38 +67,122 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (cache_is_vivt()) { - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) - __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end), - vma->vm_flags); + vivt_flush_cache_range(vma, start, end); return; } if (cache_is_vipt_aliasing()) { asm( "mcr p15, 0, %0, c7, c14, 0\n" - " mcr p15, 0, %0, c7, c5, 0\n" " mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "cc"); } + + if (vma->vm_flags & VM_EXEC) + __flush_icache_all(); } void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn) { if (cache_is_vivt()) { - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) { - unsigned long addr = user_addr & PAGE_MASK; - __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags); - } + vivt_flush_cache_page(vma, user_addr, pfn); return; } - if (cache_is_vipt_aliasing()) + if (cache_is_vipt_aliasing()) { flush_pfn_alias(pfn, user_addr); + __flush_icache_all(); + } + + if (vma->vm_flags & VM_EXEC && icache_is_vivt_asid_tagged()) + __flush_icache_all(); } + #else -#define flush_pfn_alias(pfn,vaddr) do { } while (0) +#define flush_pfn_alias(pfn,vaddr) do { } while (0) +#define flush_icache_alias(pfn,vaddr,len) do { } while (0) +#endif + +#define FLAG_PA_IS_EXEC 1 +#define FLAG_PA_CORE_IN_MM 2 + +static void flush_ptrace_access_other(void *args) +{ + __flush_icache_all(); +} + +static inline +void __flush_ptrace_access(struct page *page, unsigned long uaddr, void *kaddr, + unsigned long len, unsigned int flags) +{ + if (cache_is_vivt()) { + if (flags & FLAG_PA_CORE_IN_MM) { + unsigned long addr = (unsigned long)kaddr; + __cpuc_coherent_kern_range(addr, addr + len); + } + return; + } + + if (cache_is_vipt_aliasing()) { + flush_pfn_alias(page_to_pfn(page), uaddr); + __flush_icache_all(); + return; + } + + /* VIPT non-aliasing D-cache */ + if (flags & FLAG_PA_IS_EXEC) { + unsigned long addr = (unsigned long)kaddr; + if (icache_is_vipt_aliasing()) + flush_icache_alias(page_to_pfn(page), uaddr, len); + else + __cpuc_coherent_kern_range(addr, addr + len); + if (cache_ops_need_broadcast()) + smp_call_function(flush_ptrace_access_other, + NULL, 1); + } +} + +static +void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *kaddr, unsigned long len) +{ + unsigned int flags = 0; + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) + flags |= FLAG_PA_CORE_IN_MM; + if (vma->vm_flags & VM_EXEC) + flags |= FLAG_PA_IS_EXEC; + __flush_ptrace_access(page, uaddr, kaddr, len, flags); +} + +void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, + void *kaddr, unsigned long len) +{ + unsigned int flags = FLAG_PA_CORE_IN_MM|FLAG_PA_IS_EXEC; + + __flush_ptrace_access(page, uaddr, kaddr, len, flags); +} + +/* + * Copy user data from/to a page which is mapped into a different + * processes address space. Really, we want to allow our "user + * space" model to handle this. + * + * Note that this code needs to run on the current CPU. + */ +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *dst, const void *src, + unsigned long len) +{ +#ifdef CONFIG_SMP + preempt_disable(); +#endif + memcpy(dst, src, len); + flush_ptrace_access(vma, page, uaddr, dst, len); +#ifdef CONFIG_SMP + preempt_enable(); #endif +} void __flush_dcache_page(struct address_space *mapping, struct page *page) { @@ -96,7 +191,27 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) * page. This ensures that data in the physical page is mutually * coherent with the kernels mapping. */ - __cpuc_flush_dcache_page(page_address(page)); + if (!PageHighMem(page)) { + size_t page_size = PAGE_SIZE << compound_order(page); + __cpuc_flush_dcache_area(page_address(page), page_size); + } else { + unsigned long i; + if (cache_is_vipt_nonaliasing()) { + for (i = 0; i < (1 << compound_order(page)); i++) { + void *addr = kmap_atomic(page + i); + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_atomic(addr); + } + } else { + for (i = 0; i < (1 << compound_order(page)); i++) { + void *addr = kmap_high_get(page + i); + if (addr) { + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high(page + i); + } + } + } + } /* * If this is a page cache page, and we have an aliasing VIPT cache, @@ -112,7 +227,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p { struct mm_struct *mm = current->active_mm; struct vm_area_struct *mpnt; - struct prio_tree_iter iter; pgoff_t pgoff; /* @@ -124,7 +238,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); flush_dcache_mmap_lock(mapping); - vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { unsigned long offset; /* @@ -140,6 +254,34 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p flush_dcache_mmap_unlock(mapping); } +#if __LINUX_ARM_ARCH__ >= 6 +void __sync_icache_dcache(pte_t pteval) +{ + unsigned long pfn; + struct page *page; + struct address_space *mapping; + + if (cache_is_vipt_nonaliasing() && !pte_exec(pteval)) + /* only flush non-aliasing VIPT caches for exec mappings */ + return; + pfn = pte_pfn(pteval); + if (!pfn_valid(pfn)) + return; + + page = pfn_to_page(pfn); + if (cache_is_vipt_aliasing()) + mapping = page_mapping(page); + else + mapping = NULL; + + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + __flush_dcache_page(mapping, page); + + if (pte_exec(pteval)) + __flush_icache_all(); +} +#endif + /* * Ensure cache coherency between kernel mapping and userspace mapping * of this page. @@ -155,17 +297,106 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p * space mappings, we can be lazy and remember that we may have dirty * kernel cache lines for later. Otherwise, we assume we have * aliasing mappings. + * + * Note that we disable the lazy flush for SMP configurations where + * the cache maintenance operations are not automatically broadcasted. */ void flush_dcache_page(struct page *page) { - struct address_space *mapping = page_mapping(page); + struct address_space *mapping; + + /* + * The zero page is never written to, so never has any dirty + * cache lines, and therefore never needs to be flushed. + */ + if (page == ZERO_PAGE(0)) + return; + + mapping = page_mapping(page); - if (mapping && !mapping_mapped(mapping)) - set_bit(PG_dcache_dirty, &page->flags); + if (!cache_ops_need_broadcast() && + mapping && !page_mapped(page)) + clear_bit(PG_dcache_clean, &page->flags); else { __flush_dcache_page(mapping, page); if (mapping && cache_is_vivt()) __flush_dcache_aliases(mapping, page); + else if (mapping) + __flush_icache_all(); + set_bit(PG_dcache_clean, &page->flags); } } EXPORT_SYMBOL(flush_dcache_page); + +/* + * Ensure cache coherency for the kernel mapping of this page. We can + * assume that the page is pinned via kmap. + * + * If the page only exists in the page cache and there are no user + * space mappings, this is a no-op since the page was already marked + * dirty at creation. Otherwise, we need to flush the dirty kernel + * cache lines directly. + */ +void flush_kernel_dcache_page(struct page *page) +{ + if (cache_is_vivt() || cache_is_vipt_aliasing()) { + struct address_space *mapping; + + mapping = page_mapping(page); + + if (!mapping || mapping_mapped(mapping)) { + void *addr; + + addr = page_address(page); + /* + * kmap_atomic() doesn't set the page virtual + * address for highmem pages, and + * kunmap_atomic() takes care of cache + * flushing already. + */ + if (!IS_ENABLED(CONFIG_HIGHMEM) || addr) + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + } + } +} +EXPORT_SYMBOL(flush_kernel_dcache_page); + +/* + * Flush an anonymous page so that users of get_user_pages() + * can safely access the data. The expected sequence is: + * + * get_user_pages() + * -> flush_anon_page + * memcpy() to/from page + * if written to page, flush_dcache_page() + */ +void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) +{ + unsigned long pfn; + + /* VIPT non-aliasing caches need do nothing */ + if (cache_is_vipt_nonaliasing()) + return; + + /* + * Write back and invalidate userspace mapping. + */ + pfn = page_to_pfn(page); + if (cache_is_vivt()) { + flush_cache_page(vma, vmaddr, pfn); + } else { + /* + * For aliasing VIPT, we can flush an alias of the + * userspace address only. + */ + flush_pfn_alias(pfn, vmaddr); + __flush_icache_all(); + } + + /* + * Invalidate kernel mapping. No data should be contained + * in this mapping of the page. FIXME: this is overkill + * since we actually ask for a write-back and invalidate. + */ + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); +} diff --git a/arch/arm/mm/fsr-2level.c b/arch/arm/mm/fsr-2level.c new file mode 100644 index 00000000000..18ca74c0f34 --- /dev/null +++ b/arch/arm/mm/fsr-2level.c @@ -0,0 +1,78 @@ +static struct fsr_info fsr_info[] = { + /* + * The following are the standard ARMv3 and ARMv4 aborts. ARMv5 + * defines these to be "precise" aborts. + */ + { do_bad, SIGSEGV, 0, "vector exception" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment exception" }, + { do_bad, SIGKILL, 0, "terminal exception" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment exception" }, + { do_bad, SIGBUS, 0, "external abort on linefetch" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, + { do_bad, SIGBUS, 0, "external abort on linefetch" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" }, + /* + * The following are "imprecise" aborts, which are signalled by bit + * 10 of the FSR, and may not be recoverable. These are only + * supported if the CPU abort handler supports bit 10. + */ + { do_bad, SIGBUS, 0, "unknown 16" }, + { do_bad, SIGBUS, 0, "unknown 17" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "lock abort" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 21" }, + { do_bad, SIGBUS, BUS_OBJERR, "imprecise external abort" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 23" }, + { do_bad, SIGBUS, 0, "dcache parity error" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 25" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "unknown 28" }, + { do_bad, SIGBUS, 0, "unknown 29" }, + { do_bad, SIGBUS, 0, "unknown 30" }, + { do_bad, SIGBUS, 0, "unknown 31" }, +}; + +static struct fsr_info ifsr_info[] = { + { do_bad, SIGBUS, 0, "unknown 0" }, + { do_bad, SIGBUS, 0, "unknown 1" }, + { do_bad, SIGBUS, 0, "debug event" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section access flag fault" }, + { do_bad, SIGBUS, 0, "unknown 4" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" }, + { do_bad, SIGBUS, 0, "unknown 10" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" }, + { do_bad, SIGBUS, 0, "unknown 16" }, + { do_bad, SIGBUS, 0, "unknown 17" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "unknown 20" }, + { do_bad, SIGBUS, 0, "unknown 21" }, + { do_bad, SIGBUS, 0, "unknown 22" }, + { do_bad, SIGBUS, 0, "unknown 23" }, + { do_bad, SIGBUS, 0, "unknown 24" }, + { do_bad, SIGBUS, 0, "unknown 25" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "unknown 28" }, + { do_bad, SIGBUS, 0, "unknown 29" }, + { do_bad, SIGBUS, 0, "unknown 30" }, + { do_bad, SIGBUS, 0, "unknown 31" }, +}; diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c new file mode 100644 index 00000000000..ab4409a2307 --- /dev/null +++ b/arch/arm/mm/fsr-3level.c @@ -0,0 +1,68 @@ +static struct fsr_info fsr_info[] = { + { do_bad, SIGBUS, 0, "unknown 0" }, + { do_bad, SIGBUS, 0, "unknown 1" }, + { do_bad, SIGBUS, 0, "unknown 2" }, + { do_bad, SIGBUS, 0, "unknown 3" }, + { do_bad, SIGBUS, 0, "reserved translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, + { do_bad, SIGBUS, 0, "reserved access flag fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, + { do_bad, SIGBUS, 0, "reserved permission fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, + { do_bad, SIGBUS, 0, "synchronous external abort" }, + { do_bad, SIGBUS, 0, "asynchronous external abort" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error" }, + { do_bad, SIGBUS, 0, "asynchronous parity error" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "unknown 32" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment fault" }, + { do_bad, SIGBUS, 0, "debug event" }, + { do_bad, SIGBUS, 0, "unknown 35" }, + { do_bad, SIGBUS, 0, "unknown 36" }, + { do_bad, SIGBUS, 0, "unknown 37" }, + { do_bad, SIGBUS, 0, "unknown 38" }, + { do_bad, SIGBUS, 0, "unknown 39" }, + { do_bad, SIGBUS, 0, "unknown 40" }, + { do_bad, SIGBUS, 0, "unknown 41" }, + { do_bad, SIGBUS, 0, "unknown 42" }, + { do_bad, SIGBUS, 0, "unknown 43" }, + { do_bad, SIGBUS, 0, "unknown 44" }, + { do_bad, SIGBUS, 0, "unknown 45" }, + { do_bad, SIGBUS, 0, "unknown 46" }, + { do_bad, SIGBUS, 0, "unknown 47" }, + { do_bad, SIGBUS, 0, "unknown 48" }, + { do_bad, SIGBUS, 0, "unknown 49" }, + { do_bad, SIGBUS, 0, "unknown 50" }, + { do_bad, SIGBUS, 0, "unknown 51" }, + { do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" }, + { do_bad, SIGBUS, 0, "unknown 53" }, + { do_bad, SIGBUS, 0, "unknown 54" }, + { do_bad, SIGBUS, 0, "unknown 55" }, + { do_bad, SIGBUS, 0, "unknown 56" }, + { do_bad, SIGBUS, 0, "unknown 57" }, + { do_bad, SIGBUS, 0, "implementation fault (coprocessor abort)" }, + { do_bad, SIGBUS, 0, "unknown 59" }, + { do_bad, SIGBUS, 0, "unknown 60" }, + { do_bad, SIGBUS, 0, "unknown 61" }, + { do_bad, SIGBUS, 0, "unknown 62" }, + { do_bad, SIGBUS, 0, "unknown 63" }, +}; + +#define ifsr_info fsr_info diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c new file mode 100644 index 00000000000..45aeaaca905 --- /dev/null +++ b/arch/arm/mm/highmem.c @@ -0,0 +1,152 @@ +/* + * arch/arm/mm/highmem.c -- ARM highmem support + * + * Author: Nicolas Pitre + * Created: september 8, 2008 + * Copyright: Marvell Semiconductors Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/highmem.h> +#include <linux/interrupt.h> +#include <asm/fixmap.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include "mm.h" + +pte_t *fixmap_page_table; + +static inline void set_fixmap_pte(int idx, pte_t pte) +{ + unsigned long vaddr = __fix_to_virt(idx); + set_pte_ext(fixmap_page_table + idx, pte, 0); + local_flush_tlb_kernel_page(vaddr); +} + +static inline pte_t get_fixmap_pte(unsigned long vaddr) +{ + unsigned long idx = __virt_to_fix(vaddr); + return *(fixmap_page_table + idx); +} + +void *kmap(struct page *page) +{ + might_sleep(); + if (!PageHighMem(page)) + return page_address(page); + return kmap_high(page); +} +EXPORT_SYMBOL(kmap); + +void kunmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} +EXPORT_SYMBOL(kunmap); + +void *kmap_atomic(struct page *page) +{ + unsigned int idx; + unsigned long vaddr; + void *kmap; + int type; + + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + +#ifdef CONFIG_DEBUG_HIGHMEM + /* + * There is no cache coherency issue when non VIVT, so force the + * dedicated kmap usage for better debugging purposes in that case. + */ + if (!cache_is_vivt()) + kmap = NULL; + else +#endif + kmap = kmap_high_get(page); + if (kmap) + return kmap; + + type = kmap_atomic_idx_push(); + + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(idx); +#ifdef CONFIG_DEBUG_HIGHMEM + /* + * With debugging enabled, kunmap_atomic forces that entry to 0. + * Make sure it was indeed properly unmapped. + */ + BUG_ON(!pte_none(*(fixmap_page_table + idx))); +#endif + /* + * When debugging is off, kunmap_atomic leaves the previous mapping + * in place, so the contained TLB flush ensures the TLB is updated + * with the new mapping. + */ + set_fixmap_pte(idx, mk_pte(page, kmap_prot)); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic); + +void __kunmap_atomic(void *kvaddr) +{ + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + int idx, type; + + if (kvaddr >= (void *)FIXADDR_START) { + type = kmap_atomic_idx(); + idx = type + KM_TYPE_NR * smp_processor_id(); + + if (cache_is_vivt()) + __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(vaddr != __fix_to_virt(idx)); + set_fixmap_pte(idx, __pte(0)); +#else + (void) idx; /* to kill a warning */ +#endif + kmap_atomic_idx_pop(); + } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { + /* this address was obtained through kmap_high_get() */ + kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); + } + pagefault_enable(); +} +EXPORT_SYMBOL(__kunmap_atomic); + +void *kmap_atomic_pfn(unsigned long pfn) +{ + unsigned long vaddr; + int idx, type; + + pagefault_disable(); + + type = kmap_atomic_idx_push(); + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(idx); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(!pte_none(*(fixmap_page_table + idx))); +#endif + set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); + + return (void *)vaddr; +} + +struct page *kmap_atomic_to_page(const void *ptr) +{ + unsigned long vaddr = (unsigned long)ptr; + + if (vaddr < FIXADDR_START) + return virt_to_page(ptr); + + return pte_page(get_fixmap_pte(vaddr)); +} diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c new file mode 100644 index 00000000000..66781bf3407 --- /dev/null +++ b/arch/arm/mm/hugetlbpage.c @@ -0,0 +1,58 @@ +/* + * arch/arm/mm/hugetlbpage.c + * + * Copyright (C) 2012 ARM Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/pgalloc.h> + +/* + * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot + * of type casting from pmd_t * to pte_t *. + */ + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +int pmd_huge(pmd_t pmd) +{ + return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c new file mode 100644 index 00000000000..c447ec70e86 --- /dev/null +++ b/arch/arm/mm/idmap.c @@ -0,0 +1,136 @@ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> + +#include <asm/cputype.h> +#include <asm/idmap.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/sections.h> +#include <asm/system_info.h> + +/* + * Note: accesses outside of the kernel image and the identity map area + * are not supported on any CPU using the idmap tables as its current + * page tables. + */ +pgd_t *idmap_pgd; +phys_addr_t (*arch_virt_to_idmap) (unsigned long x); + +#ifdef CONFIG_ARM_LPAE +static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pmd_t *pmd; + unsigned long next; + + if (pud_none_or_clear_bad(pud) || (pud_val(*pud) & L_PGD_SWAPPER)) { + pmd = pmd_alloc_one(&init_mm, addr); + if (!pmd) { + pr_warning("Failed to allocate identity pmd.\n"); + return; + } + /* + * Copy the original PMD to ensure that the PMD entries for + * the kernel image are preserved. + */ + if (!pud_none(*pud)) + memcpy(pmd, pmd_offset(pud, 0), + PTRS_PER_PMD * sizeof(pmd_t)); + pud_populate(&init_mm, pud, pmd); + pmd += pmd_index(addr); + } else + pmd = pmd_offset(pud, addr); + + do { + next = pmd_addr_end(addr, end); + *pmd = __pmd((addr & PMD_MASK) | prot); + flush_pmd_entry(pmd); + } while (pmd++, addr = next, addr != end); +} +#else /* !CONFIG_ARM_LPAE */ +static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pmd_t *pmd = pmd_offset(pud, addr); + + addr = (addr & PMD_MASK) | prot; + pmd[0] = __pmd(addr); + addr += SECTION_SIZE; + pmd[1] = __pmd(addr); + flush_pmd_entry(pmd); +} +#endif /* CONFIG_ARM_LPAE */ + +static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + idmap_add_pmd(pud, addr, next, prot); + } while (pud++, addr = next, addr != end); +} + +static void identity_mapping_add(pgd_t *pgd, const char *text_start, + const char *text_end, unsigned long prot) +{ + unsigned long addr, end; + unsigned long next; + + addr = virt_to_idmap(text_start); + end = virt_to_idmap(text_end); + pr_info("Setting up static identity map for 0x%lx - 0x%lx\n", addr, end); + + prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; + + if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) + prot |= PMD_BIT4; + + pgd += pgd_index(addr); + do { + next = pgd_addr_end(addr, end); + idmap_add_pud(pgd, addr, next, prot); + } while (pgd++, addr = next, addr != end); +} + +extern char __idmap_text_start[], __idmap_text_end[]; + +static int __init init_static_idmap(void) +{ + idmap_pgd = pgd_alloc(&init_mm); + if (!idmap_pgd) + return -ENOMEM; + + identity_mapping_add(idmap_pgd, __idmap_text_start, + __idmap_text_end, 0); + + /* Flush L1 for the hardware to see this page table content */ + flush_cache_louis(); + + return 0; +} +early_initcall(init_static_idmap); + +/* + * In order to soft-boot, we need to switch to a 1:1 mapping for the + * cpu_reset functions. This will then ensure that we have predictable + * results when turning off the mmu. + */ +void setup_mm_for_reboot(void) +{ + /* Switch to the identity mapping. */ + cpu_switch_mm(idmap_pgd, &init_mm); + local_flush_bp_all(); + +#ifdef CONFIG_CPU_HAS_ASID + /* + * We don't have a clean ASID for the identity mapping, which + * may clash with virtual addresses of the previous page tables + * and therefore potentially in the TLB. + */ + local_flush_tlb_all(); +#endif +} diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c168f322ef8..659c75d808d 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -7,60 +7,107 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/kernel.h> #include <linux/errno.h> -#include <linux/ptrace.h> #include <linux/swap.h> #include <linux/init.h> #include <linux/bootmem.h> #include <linux/mman.h> +#include <linux/export.h> #include <linux/nodemask.h> #include <linux/initrd.h> - +#include <linux/of_fdt.h> +#include <linux/highmem.h> +#include <linux/gfp.h> +#include <linux/memblock.h> +#include <linux/dma-contiguous.h> +#include <linux/sizes.h> + +#include <asm/cp15.h> #include <asm/mach-types.h> -#include <asm/hardware.h> +#include <asm/memblock.h> +#include <asm/prom.h> +#include <asm/sections.h> #include <asm/setup.h> #include <asm/tlb.h> +#include <asm/fixmap.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> -#define TABLE_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t)) +#include "mm.h" + +#ifdef CONFIG_CPU_CP15_MMU +unsigned long __init __clear_cr(unsigned long mask) +{ + cr_alignment = cr_alignment & ~mask; + return cr_alignment; +} +#endif -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +static phys_addr_t phys_initrd_start __initdata = 0; +static unsigned long phys_initrd_size __initdata = 0; -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end; -extern unsigned long phys_initrd_start; -extern unsigned long phys_initrd_size; +static int __init early_initrd(char *p) +{ + phys_addr_t start; + unsigned long size; + char *endp; -/* - * The sole use of this is to pass memory configuration - * data from paging_init to mem_init. - */ -static struct meminfo meminfo __initdata = { 0, }; + start = memparse(p, &endp); + if (*endp == ',') { + size = memparse(endp + 1, NULL); + + phys_initrd_start = start; + phys_initrd_size = size; + } + return 0; +} +early_param("initrd", early_initrd); + +static int __init parse_tag_initrd(const struct tag *tag) +{ + printk(KERN_WARNING "ATAG_INITRD is deprecated; " + "please update your bootloader.\n"); + phys_initrd_start = __virt_to_phys(tag->u.initrd.start); + phys_initrd_size = tag->u.initrd.size; + return 0; +} + +__tagtable(ATAG_INITRD, parse_tag_initrd); + +static int __init parse_tag_initrd2(const struct tag *tag) +{ + phys_initrd_start = tag->u.initrd.start; + phys_initrd_size = tag->u.initrd.size; + return 0; +} + +__tagtable(ATAG_INITRD2, parse_tag_initrd2); /* - * empty_zero_page is a special page that is used for - * zero-initialized data and COW. + * This keeps memory configuration data used by a couple memory + * initialization functions, as well as show_mem() for the skipping + * of holes in the memory map. It is populated by arm_add_memory(). */ -struct page *empty_zero_page; - -void show_mem(void) +void show_mem(unsigned int filter) { int free = 0, total = 0, reserved = 0; - int shared = 0, cached = 0, slab = 0, node; + int shared = 0, cached = 0, slab = 0; + struct memblock_region *reg; printk("Mem-info:\n"); - show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + show_free_areas(filter); - for_each_online_node(node) { + for_each_memblock (memory, reg) { + unsigned int pfn1, pfn2; struct page *page, *end; - page = NODE_MEM_MAP(node); - end = page + NODE_DATA(node)->node_spanned_pages; + pfn1 = memblock_region_memory_base_pfn(reg); + pfn2 = memblock_region_memory_end_pfn(reg); + + page = pfn_to_page(pfn1); + end = pfn_to_page(pfn2 - 1) + 1; do { total++; @@ -74,8 +121,9 @@ void show_mem(void) free++; else shared += page_count(page) - 1; - page++; - } while (page < end); + pfn1++; + page = pfn_to_page(pfn1); + } while (pfn1 < pfn2); } printk("%d pages of RAM\n", total); @@ -86,469 +134,259 @@ void show_mem(void) printk("%d pages swap cached\n", cached); } -static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt) +static void __init find_limits(unsigned long *min, unsigned long *max_low, + unsigned long *max_high) { - return pmd_offset(pgd, virt); + *max_low = PFN_DOWN(memblock_get_current_limit()); + *min = PFN_UP(memblock_start_of_DRAM()); + *max_high = PFN_DOWN(memblock_end_of_DRAM()); } -static inline pmd_t *pmd_off_k(unsigned long virt) -{ - return pmd_off(pgd_offset_k(virt), virt); -} +#ifdef CONFIG_ZONE_DMA -#define for_each_nodebank(iter,mi,no) \ - for (iter = 0; iter < mi->nr_banks; iter++) \ - if (mi->bank[iter].node == no) +phys_addr_t arm_dma_zone_size __read_mostly; +EXPORT_SYMBOL(arm_dma_zone_size); /* - * FIXME: We really want to avoid allocating the bootmap bitmap - * over the top of the initrd. Hopefully, this is located towards - * the start of a bank, so if we allocate the bootmap bitmap at - * the end, we won't clash. + * The DMA mask corresponding to the maximum bus address allocatable + * using GFP_DMA. The default here places no restriction on DMA + * allocations. This must be the smallest DMA mask in the system, + * so a successful GFP_DMA allocation will always satisfy this. */ -static unsigned int __init -find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages) -{ - unsigned int start_pfn, bank, bootmap_pfn; - - start_pfn = PAGE_ALIGN(__pa(&_end)) >> PAGE_SHIFT; - bootmap_pfn = 0; - - for_each_nodebank(bank, mi, node) { - unsigned int start, end; - - start = mi->bank[bank].start >> PAGE_SHIFT; - end = (mi->bank[bank].size + - mi->bank[bank].start) >> PAGE_SHIFT; - - if (end < start_pfn) - continue; +phys_addr_t arm_dma_limit; +unsigned long arm_dma_pfn_limit; - if (start < start_pfn) - start = start_pfn; - - if (end <= start) - continue; - - if (end - start >= bootmap_pages) { - bootmap_pfn = start; - break; - } - } - - if (bootmap_pfn == 0) - BUG(); +static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole, + unsigned long dma_size) +{ + if (size[0] <= dma_size) + return; - return bootmap_pfn; + size[ZONE_NORMAL] = size[0] - dma_size; + size[ZONE_DMA] = dma_size; + hole[ZONE_NORMAL] = hole[0]; + hole[ZONE_DMA] = 0; } +#endif -static int __init check_initrd(struct meminfo *mi) +void __init setup_dma_zone(const struct machine_desc *mdesc) { - int initrd_node = -2; -#ifdef CONFIG_BLK_DEV_INITRD - unsigned long end = phys_initrd_start + phys_initrd_size; - - /* - * Make sure that the initrd is within a valid area of - * memory. - */ - if (phys_initrd_size) { - unsigned int i; - - initrd_node = -1; - - for (i = 0; i < mi->nr_banks; i++) { - unsigned long bank_end; - - bank_end = mi->bank[i].start + mi->bank[i].size; - - if (mi->bank[i].start <= phys_initrd_start && - end <= bank_end) - initrd_node = mi->bank[i].node; - } - } - - if (initrd_node == -1) { - printk(KERN_ERR "initrd (0x%08lx - 0x%08lx) extends beyond " - "physical memory - disabling initrd\n", - phys_initrd_start, end); - phys_initrd_start = phys_initrd_size = 0; - } +#ifdef CONFIG_ZONE_DMA + if (mdesc->dma_zone_size) { + arm_dma_zone_size = mdesc->dma_zone_size; + arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; + } else + arm_dma_limit = 0xffffffff; + arm_dma_pfn_limit = arm_dma_limit >> PAGE_SHIFT; #endif - - return initrd_node; } -/* - * Reserve the various regions of node 0 - */ -static __init void reserve_node_zero(pg_data_t *pgdat) +static void __init zone_sizes_init(unsigned long min, unsigned long max_low, + unsigned long max_high) { - unsigned long res_size = 0; + unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; + struct memblock_region *reg; /* - * Register the kernel text and data with bootmem. - * Note that this can only be in node 0. + * initialise the zones. */ -#ifdef CONFIG_XIP_KERNEL - reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start); -#else - reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext); -#endif + memset(zone_size, 0, sizeof(zone_size)); /* - * Reserve the page tables. These are already in use, - * and can only be in node 0. + * The memory size has already been determined. If we need + * to do anything fancy with the allocation of this memory + * to the zones, now is the time to do it. */ - reserve_bootmem_node(pgdat, __pa(swapper_pg_dir), - PTRS_PER_PGD * sizeof(pgd_t)); + zone_size[0] = max_low - min; +#ifdef CONFIG_HIGHMEM + zone_size[ZONE_HIGHMEM] = max_high - max_low; +#endif /* - * Hmm... This should go elsewhere, but we really really need to - * stop things allocating the low memory; ideally we need a better - * implementation of GFP_DMA which does not assume that DMA-able - * memory starts at zero. + * Calculate the size of the holes. + * holes = node_size - sum(bank_sizes) */ - if (machine_is_integrator() || machine_is_cintegrator()) - res_size = __pa(swapper_pg_dir) - PHYS_OFFSET; + memcpy(zhole_size, zone_size, sizeof(zhole_size)); + for_each_memblock(memory, reg) { + unsigned long start = memblock_region_memory_base_pfn(reg); + unsigned long end = memblock_region_memory_end_pfn(reg); - /* - * These should likewise go elsewhere. They pre-reserve the - * screen memory region at the start of main system memory. - */ - if (machine_is_edb7211()) - res_size = 0x00020000; - if (machine_is_p720t()) - res_size = 0x00014000; + if (start < max_low) { + unsigned long low_end = min(end, max_low); + zhole_size[0] -= low_end - start; + } +#ifdef CONFIG_HIGHMEM + if (end > max_low) { + unsigned long high_start = max(start, max_low); + zhole_size[ZONE_HIGHMEM] -= end - high_start; + } +#endif + } -#ifdef CONFIG_SA1111 +#ifdef CONFIG_ZONE_DMA /* - * Because of the SA1111 DMA bug, we want to preserve our - * precious DMA-able memory... + * Adjust the sizes according to any special requirements for + * this machine type. */ - res_size = __pa(swapper_pg_dir) - PHYS_OFFSET; + if (arm_dma_zone_size) + arm_adjust_dma_zone(zone_size, zhole_size, + arm_dma_zone_size >> PAGE_SHIFT); #endif - if (res_size) - reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size); -} -void __init build_mem_type_table(void); -void __init create_mapping(struct map_desc *md); + free_area_init_node(0, zone_size, min, zhole_size); +} -static unsigned long __init -bootmem_init_node(int node, int initrd_node, struct meminfo *mi) +#ifdef CONFIG_HAVE_ARCH_PFN_VALID +int pfn_valid(unsigned long pfn) { - unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; - unsigned long start_pfn, end_pfn, boot_pfn; - unsigned int boot_pages; - pg_data_t *pgdat; - int i; - - start_pfn = -1UL; - end_pfn = 0; - - /* - * Calculate the pfn range, and map the memory banks for this node. - */ - for_each_nodebank(i, mi, node) { - unsigned long start, end; - struct map_desc map; - - start = mi->bank[i].start >> PAGE_SHIFT; - end = (mi->bank[i].start + mi->bank[i].size) >> PAGE_SHIFT; - - if (start_pfn > start) - start_pfn = start; - if (end_pfn < end) - end_pfn = end; + return memblock_is_memory(__pfn_to_phys(pfn)); +} +EXPORT_SYMBOL(pfn_valid); +#endif - map.pfn = __phys_to_pfn(mi->bank[i].start); - map.virtual = __phys_to_virt(mi->bank[i].start); - map.length = mi->bank[i].size; - map.type = MT_MEMORY; +#ifndef CONFIG_SPARSEMEM +static void __init arm_memory_present(void) +{ +} +#else +static void __init arm_memory_present(void) +{ + struct memblock_region *reg; - create_mapping(&map); - } + for_each_memblock(memory, reg) + memory_present(0, memblock_region_memory_base_pfn(reg), + memblock_region_memory_end_pfn(reg)); +} +#endif - /* - * If there is no memory in this node, ignore it. - */ - if (end_pfn == 0) - return end_pfn; +static bool arm_memblock_steal_permitted = true; - /* - * Allocate the bootmem bitmap page. - */ - boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - boot_pfn = find_bootmap_pfn(node, mi, boot_pages); +phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) +{ + phys_addr_t phys; - /* - * Initialise the bootmem allocator for this node, handing the - * memory banks over to bootmem. - */ - node_set_online(node); - pgdat = NODE_DATA(node); - init_bootmem_node(pgdat, boot_pfn, start_pfn, end_pfn); + BUG_ON(!arm_memblock_steal_permitted); - for_each_nodebank(i, mi, node) - free_bootmem_node(pgdat, mi->bank[i].start, mi->bank[i].size); + phys = memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); + memblock_free(phys, size); + memblock_remove(phys, size); - /* - * Reserve the bootmem bitmap for this node. - */ - reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT, - boot_pages << PAGE_SHIFT); + return phys; +} +void __init arm_memblock_init(const struct machine_desc *mdesc) +{ + /* Register the kernel text, kernel data and initrd with memblock. */ +#ifdef CONFIG_XIP_KERNEL + memblock_reserve(__pa(_sdata), _end - _sdata); +#else + memblock_reserve(__pa(_stext), _end - _stext); +#endif #ifdef CONFIG_BLK_DEV_INITRD - /* - * If the initrd is in this node, reserve its memory. - */ - if (node == initrd_node) { - reserve_bootmem_node(pgdat, phys_initrd_start, - phys_initrd_size); + /* FDT scan will populate initrd_start */ + if (initrd_start && !phys_initrd_size) { + phys_initrd_start = __virt_to_phys(initrd_start); + phys_initrd_size = initrd_end - initrd_start; + } + initrd_start = initrd_end = 0; + if (phys_initrd_size && + !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) { + pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n", + (u64)phys_initrd_start, phys_initrd_size); + phys_initrd_start = phys_initrd_size = 0; + } + if (phys_initrd_size && + memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) { + pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n", + (u64)phys_initrd_start, phys_initrd_size); + phys_initrd_start = phys_initrd_size = 0; + } + if (phys_initrd_size) { + memblock_reserve(phys_initrd_start, phys_initrd_size); + + /* Now convert initrd to virtual addresses */ initrd_start = __phys_to_virt(phys_initrd_start); initrd_end = initrd_start + phys_initrd_size; } #endif - /* - * Finally, reserve any node zero regions. - */ - if (node == 0) - reserve_node_zero(pgdat); + arm_mm_memblock_reserve(); - /* - * initialise the zones within this node. - */ - memset(zone_size, 0, sizeof(zone_size)); - memset(zhole_size, 0, sizeof(zhole_size)); + /* reserve any platform specific memblock areas */ + if (mdesc->reserve) + mdesc->reserve(); - /* - * The size of this node has already been determined. If we need - * to do anything fancy with the allocation of this memory to the - * zones, now is the time to do it. - */ - zone_size[0] = end_pfn - start_pfn; - - /* - * For each bank in this node, calculate the size of the holes. - * holes = node_size - sum(bank_sizes_in_node) - */ - zhole_size[0] = zone_size[0]; - for_each_nodebank(i, mi, node) - zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT; + early_init_fdt_scan_reserved_mem(); /* - * Adjust the sizes according to any special requirements for - * this machine type. + * reserve memory for DMA contigouos allocations, + * must come from DMA area inside low memory */ - arch_adjust_zones(node, zone_size, zhole_size); + dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit)); - free_area_init_node(node, pgdat, zone_size, start_pfn, zhole_size); - - return end_pfn; + arm_memblock_steal_permitted = false; + memblock_dump_all(); } -static void __init bootmem_init(struct meminfo *mi) +void __init bootmem_init(void) { - unsigned long addr, memend_pfn = 0; - int node, initrd_node, i; + unsigned long min, max_low, max_high; - /* - * Invalidate the node number for empty or invalid memory banks - */ - for (i = 0; i < mi->nr_banks; i++) - if (mi->bank[i].size == 0 || mi->bank[i].node >= MAX_NUMNODES) - mi->bank[i].node = -1; - - memcpy(&meminfo, mi, sizeof(meminfo)); + memblock_allow_resize(); + max_low = max_high = 0; - /* - * Clear out all the mappings below the kernel image. - */ - for (addr = 0; addr < MODULE_START; addr += PGDIR_SIZE) - pmd_clear(pmd_off_k(addr)); -#ifdef CONFIG_XIP_KERNEL - /* The XIP kernel is mapped in the module area -- skip over it */ - addr = ((unsigned long)&_etext + PGDIR_SIZE - 1) & PGDIR_MASK; -#endif - for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE) - pmd_clear(pmd_off_k(addr)); + find_limits(&min, &max_low, &max_high); /* - * Clear out all the kernel space mappings, except for the first - * memory bank, up to the end of the vmalloc region. + * Sparsemem tries to allocate bootmem in memory_present(), + * so must be done after the fixed reservations */ - for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size); - addr < VMALLOC_END; addr += PGDIR_SIZE) - pmd_clear(pmd_off_k(addr)); + arm_memory_present(); /* - * Locate which node contains the ramdisk image, if any. + * sparse_init() needs the bootmem allocator up and running. */ - initrd_node = check_initrd(mi); + sparse_init(); /* - * Run through each node initialising the bootmem allocator. + * Now free the memory - free_area_init_node needs + * the sparse mem_map arrays initialized by sparse_init() + * for memmap_init_zone(), otherwise all PFNs are invalid. */ - for_each_node(node) { - unsigned long end_pfn; - - end_pfn = bootmem_init_node(node, initrd_node, mi); - - /* - * Remember the highest memory PFN. - */ - if (end_pfn > memend_pfn) - memend_pfn = end_pfn; - } - - high_memory = __va(memend_pfn << PAGE_SHIFT); + zone_sizes_init(min, max_low, max_high); /* * This doesn't seem to be used by the Linux memory manager any * more, but is used by ll_rw_block. If we can get rid of it, we * also get rid of some of the stuff above as well. - * - * Note: max_low_pfn and max_pfn reflect the number of _pages_ in - * the system, not the maximum PFN. - */ - max_pfn = max_low_pfn = memend_pfn - PHYS_PFN_OFFSET; -} - -/* - * Set up device the mappings. Since we clear out the page tables for all - * mappings above VMALLOC_END, we will remove any debug device mappings. - * This means you have to be careful how you debug this function, or any - * called function. (Do it by code inspection!) - */ -static void __init devicemaps_init(struct machine_desc *mdesc) -{ - struct map_desc map; - unsigned long addr; - void *vectors; - - for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE) - pmd_clear(pmd_off_k(addr)); - - /* - * Map the kernel if it is XIP. - * It is always first in the modulearea. - */ -#ifdef CONFIG_XIP_KERNEL - map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & PGDIR_MASK); - map.virtual = MODULE_START; - map.length = ((unsigned long)&_etext - map.virtual + ~PGDIR_MASK) & PGDIR_MASK; - map.type = MT_ROM; - create_mapping(&map); -#endif - - /* - * Map the cache flushing regions. - */ -#ifdef FLUSH_BASE - map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS); - map.virtual = FLUSH_BASE; - map.length = PGDIR_SIZE; - map.type = MT_CACHECLEAN; - create_mapping(&map); -#endif -#ifdef FLUSH_BASE_MINICACHE - map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + PGDIR_SIZE); - map.virtual = FLUSH_BASE_MINICACHE; - map.length = PGDIR_SIZE; - map.type = MT_MINICLEAN; - create_mapping(&map); -#endif - - flush_cache_all(); - local_flush_tlb_all(); - - vectors = alloc_bootmem_low_pages(PAGE_SIZE); - BUG_ON(!vectors); - - /* - * Create a mapping for the machine vectors at the high-vectors - * location (0xffff0000). If we aren't using high-vectors, also - * create a mapping at the low-vectors virtual address. - */ - map.pfn = __phys_to_pfn(virt_to_phys(vectors)); - map.virtual = 0xffff0000; - map.length = PAGE_SIZE; - map.type = MT_HIGH_VECTORS; - create_mapping(&map); - - if (!vectors_high()) { - map.virtual = 0; - map.type = MT_LOW_VECTORS; - create_mapping(&map); - } - - /* - * Ask the machine support to map in the statically mapped devices. - */ - if (mdesc->map_io) - mdesc->map_io(); - - /* - * Finally flush the tlb again - this ensures that we're in a - * consistent state wrt the writebuffer if the writebuffer needs - * draining. After this point, we can start to touch devices - * again. */ - local_flush_tlb_all(); + min_low_pfn = min; + max_low_pfn = max_low; + max_pfn = max_high; } /* - * paging_init() sets up the page tables, initialises the zone memory - * maps, and sets up the zero page, bad page and bad page tables. + * Poison init memory with an undefined instruction (ARM) or a branch to an + * undefined instruction (Thumb). */ -void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc) -{ - void *zero_page; - - build_mem_type_table(); - bootmem_init(mi); - devicemaps_init(mdesc); - - top_pmd = pmd_off_k(0xffff0000); - - /* - * allocate the zero page. Note that we count on this going ok. - */ - zero_page = alloc_bootmem_low_pages(PAGE_SIZE); - memzero(zero_page, PAGE_SIZE); - empty_zero_page = virt_to_page(zero_page); - flush_dcache_page(empty_zero_page); -} - -static inline void free_area(unsigned long addr, unsigned long end, char *s) +static inline void poison_init_mem(void *s, size_t count) { - unsigned int size = (end - addr) >> 10; - - for (; addr < end; addr += PAGE_SIZE) { - struct page *page = virt_to_page(addr); - ClearPageReserved(page); - set_page_count(page, 1); - free_page(addr); - totalram_pages++; - } - - if (size && s) - printk(KERN_INFO "Freeing %s memory: %dK\n", s, size); + u32 *p = (u32 *)s; + for (; count != 0; count -= 4) + *p++ = 0xe7fddef0; } static inline void -free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn) +free_memmap(unsigned long start_pfn, unsigned long end_pfn) { struct page *start_pg, *end_pg; - unsigned long pg, pgend; + phys_addr_t pg, pgend; /* * Convert start_pfn/end_pfn to a struct page pointer. */ - start_pg = pfn_to_page(start_pfn); - end_pg = pfn_to_page(end_pfn); + start_pg = pfn_to_page(start_pfn - 1) + 1; + end_pg = pfn_to_page(end_pfn - 1) + 1; /* * Convert to physical addresses, and @@ -562,40 +400,116 @@ free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn) * free the section of the memmap array. */ if (pg < pgend) - free_bootmem_node(NODE_DATA(node), pg, pgend - pg); + memblock_free_early(pg, pgend - pg); } /* * The mem_map array can get very big. Free the unused area of the memory map. */ -static void __init free_unused_memmap_node(int node, struct meminfo *mi) +static void __init free_unused_memmap(void) { - unsigned long bank_start, prev_bank_end = 0; - unsigned int i; + unsigned long start, prev_end = 0; + struct memblock_region *reg; /* - * [FIXME] This relies on each bank being in address order. This - * may not be the case, especially if the user has provided the - * information on the command line. + * This relies on each bank being in address order. + * The banks are sorted previously in bootmem_init(). */ - for_each_nodebank(i, mi, node) { - bank_start = mi->bank[i].start >> PAGE_SHIFT; - if (bank_start < prev_bank_end) { - printk(KERN_ERR "MEM: unordered memory banks. " - "Not freeing memmap.\n"); - break; - } + for_each_memblock(memory, reg) { + start = memblock_region_memory_base_pfn(reg); +#ifdef CONFIG_SPARSEMEM + /* + * Take care not to free memmap entries that don't exist + * due to SPARSEMEM sections which aren't present. + */ + start = min(start, + ALIGN(prev_end, PAGES_PER_SECTION)); +#else + /* + * Align down here since the VM subsystem insists that the + * memmap entries are valid from the bank start aligned to + * MAX_ORDER_NR_PAGES. + */ + start = round_down(start, MAX_ORDER_NR_PAGES); +#endif /* * If we had a previous bank, and there is a space * between the current bank and the previous, free it. */ - if (prev_bank_end && prev_bank_end != bank_start) - free_memmap(node, prev_bank_end, bank_start); + if (prev_end && prev_end < start) + free_memmap(prev_end, start); + + /* + * Align up here since the VM subsystem insists that the + * memmap entries are valid from the bank end aligned to + * MAX_ORDER_NR_PAGES. + */ + prev_end = ALIGN(memblock_region_memory_end_pfn(reg), + MAX_ORDER_NR_PAGES); + } + +#ifdef CONFIG_SPARSEMEM + if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) + free_memmap(prev_end, + ALIGN(prev_end, PAGES_PER_SECTION)); +#endif +} + +#ifdef CONFIG_HIGHMEM +static inline void free_area_high(unsigned long pfn, unsigned long end) +{ + for (; pfn < end; pfn++) + free_highmem_page(pfn_to_page(pfn)); +} +#endif + +static void __init free_highpages(void) +{ +#ifdef CONFIG_HIGHMEM + unsigned long max_low = max_low_pfn; + struct memblock_region *mem, *res; + + /* set highmem page free */ + for_each_memblock(memory, mem) { + unsigned long start = memblock_region_memory_base_pfn(mem); + unsigned long end = memblock_region_memory_end_pfn(mem); + + /* Ignore complete lowmem entries */ + if (end <= max_low) + continue; - prev_bank_end = (mi->bank[i].start + - mi->bank[i].size) >> PAGE_SHIFT; + /* Truncate partial highmem entries */ + if (start < max_low) + start = max_low; + + /* Find and exclude any reserved regions */ + for_each_memblock(reserved, res) { + unsigned long res_start, res_end; + + res_start = memblock_region_reserved_base_pfn(res); + res_end = memblock_region_reserved_end_pfn(res); + + if (res_end < start) + continue; + if (res_start < start) + res_start = start; + if (res_start > end) + res_start = end; + if (res_end > end) + res_end = end; + if (res_start != start) + free_area_high(start, res_start); + start = res_end; + if (start == end) + break; + } + + /* And now free anything which remains */ + if (start < end) + free_area_high(start, end); } +#endif } /* @@ -605,51 +519,92 @@ static void __init free_unused_memmap_node(int node, struct meminfo *mi) */ void __init mem_init(void) { - unsigned int codepages, datapages, initpages; - int i, node; +#ifdef CONFIG_HAVE_TCM + /* These pointers are filled in on TCM detection */ + extern u32 dtcm_end; + extern u32 itcm_end; +#endif - codepages = &_etext - &_text; - datapages = &_end - &__data_start; - initpages = &__init_end - &__init_begin; + set_max_mapnr(pfn_to_page(max_pfn) - mem_map); -#ifndef CONFIG_DISCONTIGMEM - max_mapnr = virt_to_page(high_memory) - mem_map; + /* this will put all unused low memory onto the freelists */ + free_unused_memmap(); + free_all_bootmem(); + +#ifdef CONFIG_SA1111 + /* now that our DMA memory is actually so designated, we can free it */ + free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, -1, NULL); #endif - /* this will put all unused low memory onto the freelists */ - for_each_online_node(node) { - pg_data_t *pgdat = NODE_DATA(node); + free_highpages(); - free_unused_memmap_node(node, &meminfo); + mem_init_print_info(NULL); - if (pgdat->node_spanned_pages != 0) - totalram_pages += free_all_bootmem_node(pgdat); - } +#define MLK(b, t) b, t, ((t) - (b)) >> 10 +#define MLM(b, t) b, t, ((t) - (b)) >> 20 +#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) -#ifdef CONFIG_SA1111 - /* now that our DMA memory is actually so designated, we can free it */ - free_area(PAGE_OFFSET, (unsigned long)swapper_pg_dir, NULL); + printk(KERN_NOTICE "Virtual kernel memory layout:\n" + " vector : 0x%08lx - 0x%08lx (%4ld kB)\n" +#ifdef CONFIG_HAVE_TCM + " DTCM : 0x%08lx - 0x%08lx (%4ld kB)\n" + " ITCM : 0x%08lx - 0x%08lx (%4ld kB)\n" +#endif + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" +#ifdef CONFIG_HIGHMEM + " pkmap : 0x%08lx - 0x%08lx (%4ld MB)\n" +#endif +#ifdef CONFIG_MODULES + " modules : 0x%08lx - 0x%08lx (%4ld MB)\n" #endif + " .text : 0x%p" " - 0x%p" " (%4d kB)\n" + " .init : 0x%p" " - 0x%p" " (%4d kB)\n" + " .data : 0x%p" " - 0x%p" " (%4d kB)\n" + " .bss : 0x%p" " - 0x%p" " (%4d kB)\n", + + MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) + + (PAGE_SIZE)), +#ifdef CONFIG_HAVE_TCM + MLK(DTCM_OFFSET, (unsigned long) dtcm_end), + MLK(ITCM_OFFSET, (unsigned long) itcm_end), +#endif + MLK(FIXADDR_START, FIXADDR_TOP), + MLM(VMALLOC_START, VMALLOC_END), + MLM(PAGE_OFFSET, (unsigned long)high_memory), +#ifdef CONFIG_HIGHMEM + MLM(PKMAP_BASE, (PKMAP_BASE) + (LAST_PKMAP) * + (PAGE_SIZE)), +#endif +#ifdef CONFIG_MODULES + MLM(MODULES_VADDR, MODULES_END), +#endif + + MLK_ROUNDUP(_text, _etext), + MLK_ROUNDUP(__init_begin, __init_end), + MLK_ROUNDUP(_sdata, _edata), + MLK_ROUNDUP(__bss_start, __bss_stop)); + +#undef MLK +#undef MLM +#undef MLK_ROUNDUP /* - * Since our memory may not be contiguous, calculate the - * real number of pages we have in this system + * Check boundaries twice: Some fundamental inconsistencies can + * be detected at build time already. */ - printk(KERN_INFO "Memory:"); - - num_physpages = 0; - for (i = 0; i < meminfo.nr_banks; i++) { - num_physpages += meminfo.bank[i].size >> PAGE_SHIFT; - printk(" %ldMB", meminfo.bank[i].size >> 20); - } +#ifdef CONFIG_MMU + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + BUG_ON(TASK_SIZE > MODULES_VADDR); +#endif - printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); - printk(KERN_NOTICE "Memory: %luKB available (%dK code, " - "%dK data, %dK init)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - codepages >> 10, datapages >> 10, initpages >> 10); +#ifdef CONFIG_HIGHMEM + BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); + BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); +#endif - if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; /* * On a machine this small we won't get @@ -662,11 +617,16 @@ void __init mem_init(void) void free_initmem(void) { - if (!machine_is_integrator() && !machine_is_cintegrator()) { - free_area((unsigned long)(&__init_begin), - (unsigned long)(&__init_end), - "init"); - } +#ifdef CONFIG_HAVE_TCM + extern char __tcm_start, __tcm_end; + + poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start); + free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link"); +#endif + + poison_init_mem(__init_begin, __init_end - __init_begin); + if (!machine_is_integrator() && !machine_is_cintegrator()) + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD @@ -675,8 +635,10 @@ static int keep_initrd; void free_initrd_mem(unsigned long start, unsigned long end) { - if (!keep_initrd) - free_area(start, end, "initrd"); + if (!keep_initrd) { + poison_init_mem((void *)start, PAGE_ALIGN(end) - start); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); + } } static int __init keepinitrd_setup(char *__unused) diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c new file mode 100644 index 00000000000..4614208369f --- /dev/null +++ b/arch/arm/mm/iomap.c @@ -0,0 +1,42 @@ +/* + * linux/arch/arm/mm/iomap.c + * + * Map IO port and PCI memory spaces so that {read,write}[bwl] can + * be used to access this memory. + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/ioport.h> +#include <linux/io.h> + +unsigned long vga_base; +EXPORT_SYMBOL(vga_base); + +#ifdef __io +void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return __io(port); +} +EXPORT_SYMBOL(ioport_map); + +void ioport_unmap(void __iomem *addr) +{ +} +EXPORT_SYMBOL(ioport_unmap); +#endif + +#ifdef CONFIG_PCI +unsigned long pcibios_min_io = 0x1000; +EXPORT_SYMBOL(pcibios_min_io); + +unsigned long pcibios_min_mem = 0x01000000; +EXPORT_SYMBOL(pcibios_min_mem); + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + if ((unsigned long)addr >= VMALLOC_START && + (unsigned long)addr < VMALLOC_END) + iounmap(addr); +} +EXPORT_SYMBOL(pci_iounmap); +#endif diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 0f128c28fee..d1e5ad7ab3b 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -24,97 +24,329 @@ #include <linux/errno.h> #include <linux/mm.h> #include <linux/vmalloc.h> +#include <linux/io.h> +#include <linux/sizes.h> +#include <asm/cp15.h> +#include <asm/cputype.h> #include <asm/cacheflush.h> -#include <asm/hardware.h> -#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> #include <asm/tlbflush.h> +#include <asm/system_info.h> -static inline void -remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, - unsigned long phys_addr, pgprot_t pgprot) +#include <asm/mach/map.h> +#include <asm/mach/pci.h> +#include "mm.h" + + +LIST_HEAD(static_vmlist); + +static struct static_vm *find_static_vm_paddr(phys_addr_t paddr, + size_t size, unsigned int mtype) { - unsigned long end; + struct static_vm *svm; + struct vm_struct *vm; - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - BUG_ON(address >= end); - do { - if (!pte_none(*pte)) - goto bad; + list_for_each_entry(svm, &static_vmlist, list) { + vm = &svm->vm; + if (!(vm->flags & VM_ARM_STATIC_MAPPING)) + continue; + if ((vm->flags & VM_ARM_MTYPE_MASK) != VM_ARM_MTYPE(mtype)) + continue; + + if (vm->phys_addr > paddr || + paddr + size - 1 > vm->phys_addr + vm->size - 1) + continue; + + return svm; + } + + return NULL; +} + +struct static_vm *find_static_vm_vaddr(void *vaddr) +{ + struct static_vm *svm; + struct vm_struct *vm; + + list_for_each_entry(svm, &static_vmlist, list) { + vm = &svm->vm; + + /* static_vmlist is ascending order */ + if (vm->addr > vaddr) + break; + + if (vm->addr <= vaddr && vm->addr + vm->size > vaddr) + return svm; + } - set_pte(pte, pfn_pte(phys_addr >> PAGE_SHIFT, pgprot)); - address += PAGE_SIZE; - phys_addr += PAGE_SIZE; - pte++; - } while (address && (address < end)); - return; + return NULL; +} + +void __init add_static_vm_early(struct static_vm *svm) +{ + struct static_vm *curr_svm; + struct vm_struct *vm; + void *vaddr; + + vm = &svm->vm; + vm_area_add_early(vm); + vaddr = vm->addr; - bad: - printk("remap_area_pte: page already exists\n"); - BUG(); + list_for_each_entry(curr_svm, &static_vmlist, list) { + vm = &curr_svm->vm; + + if (vm->addr > vaddr) + break; + } + list_add_tail(&svm->list, &curr_svm->list); } -static inline int -remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) +int ioremap_page(unsigned long virt, unsigned long phys, + const struct mem_type *mtype) { - unsigned long end; - pgprot_t pgprot; + return ioremap_page_range(virt, virt + PAGE_SIZE, phys, + __pgprot(mtype->prot_pte)); +} +EXPORT_SYMBOL(ioremap_page); - address &= ~PGDIR_MASK; - end = address + size; +void __check_vmalloc_seq(struct mm_struct *mm) +{ + unsigned int seq; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; + do { + seq = init_mm.context.vmalloc_seq; + memcpy(pgd_offset(mm, VMALLOC_START), + pgd_offset_k(VMALLOC_START), + sizeof(pgd_t) * (pgd_index(VMALLOC_END) - + pgd_index(VMALLOC_START))); + mm->context.vmalloc_seq = seq; + } while (seq != init_mm.context.vmalloc_seq); +} - phys_addr -= address; - BUG_ON(address >= end); +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) +/* + * Section support is unsafe on SMP - If you iounmap and ioremap a region, + * the other CPUs will not see this change until their next context switch. + * Meanwhile, (eg) if an interrupt comes in on one of those other CPUs + * which requires the new ioremap'd region to be referenced, the CPU will + * reference the _old_ region. + * + * Note that get_vm_area_caller() allocates a guard 4K page, so we need to + * mask the size back to 1MB aligned or we will overflow in the loop below. + */ +static void unmap_area_sections(unsigned long virt, unsigned long size) +{ + unsigned long addr = virt, end = virt + (size & ~(SZ_1M - 1)); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmdp; - pgprot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | flags); + flush_cache_vunmap(addr, end); + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmdp = pmd_offset(pud, addr); do { - pte_t * pte = pte_alloc_kernel(pmd, address); - if (!pte) - return -ENOMEM; - remap_area_pte(pte, address, end - address, address + phys_addr, pgprot); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); + pmd_t pmd = *pmdp; + + if (!pmd_none(pmd)) { + /* + * Clear the PMD from the page table, and + * increment the vmalloc sequence so others + * notice this change. + * + * Note: this is still racy on SMP machines. + */ + pmd_clear(pmdp); + init_mm.context.vmalloc_seq++; + + /* + * Free the page table, if there was one. + */ + if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE) + pte_free_kernel(&init_mm, pmd_page_vaddr(pmd)); + } + + addr += PMD_SIZE; + pmdp += 2; + } while (addr < end); + + /* + * Ensure that the active_mm is up to date - we want to + * catch any use-after-iounmap cases. + */ + if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq) + __check_vmalloc_seq(current->active_mm); + + flush_tlb_kernel_range(virt, end); +} + +static int +remap_area_sections(unsigned long virt, unsigned long pfn, + size_t size, const struct mem_type *type) +{ + unsigned long addr = virt, end = virt + size; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + /* + * Remove and free any PTE-based mapping, and + * sync the current kernel mapping. + */ + unmap_area_sections(virt, size); + + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + do { + pmd[0] = __pmd(__pfn_to_phys(pfn) | type->prot_sect); + pfn += SZ_1M >> PAGE_SHIFT; + pmd[1] = __pmd(__pfn_to_phys(pfn) | type->prot_sect); + pfn += SZ_1M >> PAGE_SHIFT; + flush_pmd_entry(pmd); + + addr += PMD_SIZE; + pmd += 2; + } while (addr < end); + return 0; } static int -remap_area_pages(unsigned long start, unsigned long phys_addr, - unsigned long size, unsigned long flags) +remap_area_supersections(unsigned long virt, unsigned long pfn, + size_t size, const struct mem_type *type) { - unsigned long address = start; - unsigned long end = start + size; - int err = 0; - pgd_t * dir; + unsigned long addr = virt, end = virt + size; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + /* + * Remove and free any PTE-based mapping, and + * sync the current kernel mapping. + */ + unmap_area_sections(virt, size); - phys_addr -= address; - dir = pgd_offset(&init_mm, address); - BUG_ON(address >= end); + pgd = pgd_offset_k(virt); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); do { - pmd_t *pmd = pmd_alloc(&init_mm, dir, address); - if (!pmd) { - err = -ENOMEM; - break; + unsigned long super_pmd_val, i; + + super_pmd_val = __pfn_to_phys(pfn) | type->prot_sect | + PMD_SECT_SUPER; + super_pmd_val |= ((pfn >> (32 - PAGE_SHIFT)) & 0xf) << 20; + + for (i = 0; i < 8; i++) { + pmd[0] = __pmd(super_pmd_val); + pmd[1] = __pmd(super_pmd_val); + flush_pmd_entry(pmd); + + addr += PMD_SIZE; + pmd += 2; } - if (remap_area_pmd(pmd, address, end - address, - phys_addr + address, flags)) { - err = -ENOMEM; - break; + + pfn += SUPERSECTION_SIZE >> PAGE_SHIFT; + } while (addr < end); + + return 0; +} +#endif + +void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, + unsigned long offset, size_t size, unsigned int mtype, void *caller) +{ + const struct mem_type *type; + int err; + unsigned long addr; + struct vm_struct *area; + phys_addr_t paddr = __pfn_to_phys(pfn); + +#ifndef CONFIG_ARM_LPAE + /* + * High mappings must be supersection aligned + */ + if (pfn >= 0x100000 && (paddr & ~SUPERSECTION_MASK)) + return NULL; +#endif + + type = get_mem_type(mtype); + if (!type) + return NULL; + + /* + * Page align the mapping size, taking account of any offset. + */ + size = PAGE_ALIGN(offset + size); + + /* + * Try to reuse one of the static mapping whenever possible. + */ + if (size && !(sizeof(phys_addr_t) == 4 && pfn >= 0x100000)) { + struct static_vm *svm; + + svm = find_static_vm_paddr(paddr, size, mtype); + if (svm) { + addr = (unsigned long)svm->vm.addr; + addr += paddr - svm->vm.phys_addr; + return (void __iomem *) (offset + addr); } + } - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); + /* + * Don't allow RAM to be mapped - this causes problems with ARMv6+ + */ + if (WARN_ON(pfn_valid(pfn))) + return NULL; + + area = get_vm_area_caller(size, VM_IOREMAP, caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + area->phys_addr = paddr; + +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) + if (DOMAIN_IO == 0 && + (((cpu_architecture() >= CPU_ARCH_ARMv6) && (get_cr() & CR_XP)) || + cpu_is_xsc3()) && pfn >= 0x100000 && + !((paddr | size | addr) & ~SUPERSECTION_MASK)) { + area->flags |= VM_ARM_SECTION_MAPPING; + err = remap_area_supersections(addr, pfn, size, type); + } else if (!((paddr | size | addr) & ~PMD_MASK)) { + area->flags |= VM_ARM_SECTION_MAPPING; + err = remap_area_sections(addr, pfn, size, type); + } else +#endif + err = ioremap_page_range(addr, addr + size, paddr, + __pgprot(type->prot_pte)); + + if (err) { + vunmap((void *)addr); + return NULL; + } + + flush_cache_vmap(addr, addr + size); + return (void __iomem *) (offset + addr); +} + +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, + unsigned int mtype, void *caller) +{ + phys_addr_t last_addr; + unsigned long offset = phys_addr & ~PAGE_MASK; + unsigned long pfn = __phys_to_pfn(phys_addr); + + /* + * Don't allow wraparound or zero size + */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; - flush_cache_vmap(start, end); - return err; + return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, + caller); } /* @@ -125,94 +357,102 @@ remap_area_pages(unsigned long start, unsigned long phys_addr, * NOTE! We need to allow non-page-aligned mappings too: we will obviously * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. - * - * 'flags' are the extra L_PTE_ flags that you want to specify for this - * mapping. See include/asm-arm/proc-armv/pgtable.h for more information. */ void __iomem * -__ioremap(unsigned long phys_addr, size_t size, unsigned long flags, - unsigned long align) +__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, + unsigned int mtype) { - void * addr; - struct vm_struct * area; - unsigned long offset, last_addr; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; + return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(__arm_ioremap_pfn); - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr + 1) - phys_addr; +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, + unsigned int, void *) = + __arm_ioremap_caller; - /* - * Ok, go for it.. - */ - area = get_vm_area(size, VM_IOREMAP); - if (!area) - return NULL; - addr = area->addr; - if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { - vfree(addr); - return NULL; - } - return (void __iomem *) (offset + (char *)addr); +void __iomem * +__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) +{ + return arch_ioremap_caller(phys_addr, size, mtype, + __builtin_return_address(0)); } -EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(__arm_ioremap); -void __iounmap(void __iomem *addr) +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space as memory. Needed when the kernel wants to execute + * code in external memory. This is needed for reprogramming source + * clocks that would affect normal memory for example. Please see + * CONFIG_GENERIC_ALLOCATOR for allocating external memory. + */ +void __iomem * +__arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached) { - vfree((void *) (PAGE_MASK & (unsigned long) addr)); + unsigned int mtype; + + if (cached) + mtype = MT_MEMORY_RWX; + else + mtype = MT_MEMORY_RWX_NONCACHED; + + return __arm_ioremap_caller(phys_addr, size, mtype, + __builtin_return_address(0)); } -EXPORT_SYMBOL(__iounmap); -#ifdef __io -void __iomem *ioport_map(unsigned long port, unsigned int nr) +void __iounmap(volatile void __iomem *io_addr) { - return __io(port); + void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr); + struct static_vm *svm; + + /* If this is a static mapping, we must leave it alone */ + svm = find_static_vm_vaddr(addr); + if (svm) + return; + +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) + { + struct vm_struct *vm; + + vm = find_vm_area(addr); + + /* + * If this is a section based mapping we need to handle it + * specially as the VM subsystem does not know how to handle + * such a beast. + */ + if (vm && (vm->flags & VM_ARM_SECTION_MAPPING)) + unmap_area_sections((unsigned long)vm->addr, vm->size); + } +#endif + + vunmap(addr); } -EXPORT_SYMBOL(ioport_map); -void ioport_unmap(void __iomem *addr) +void (*arch_iounmap)(volatile void __iomem *) = __iounmap; + +void __arm_iounmap(volatile void __iomem *io_addr) { + arch_iounmap(io_addr); } -EXPORT_SYMBOL(ioport_unmap); -#endif +EXPORT_SYMBOL(__arm_iounmap); #ifdef CONFIG_PCI -#include <linux/pci.h> -#include <linux/ioport.h> +static int pci_ioremap_mem_type = MT_DEVICE; -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +void pci_ioremap_set_mem_type(int mem_type) { - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); - unsigned long flags = pci_resource_flags(dev, bar); - - if (!len || !start) - return NULL; - if (maxlen && len > maxlen) - len = maxlen; - if (flags & IORESOURCE_IO) - return ioport_map(start, len); - if (flags & IORESOURCE_MEM) { - if (flags & IORESOURCE_CACHEABLE) - return ioremap(start, len); - return ioremap_nocache(start, len); - } - return NULL; + pci_ioremap_mem_type = mem_type; } -EXPORT_SYMBOL(pci_iomap); -void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) { - if ((unsigned long)addr >= VMALLOC_START && - (unsigned long)addr < VMALLOC_END) - iounmap(addr); + BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + + return ioremap_page_range(PCI_IO_VIRT_BASE + offset, + PCI_IO_VIRT_BASE + offset + SZ_64K, + phys_addr, + __pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte)); } -EXPORT_SYMBOL(pci_iounmap); +EXPORT_SYMBOL_GPL(pci_ioremap_io); #endif diff --git a/arch/arm/mm/l2c-common.c b/arch/arm/mm/l2c-common.c new file mode 100644 index 00000000000..10a3cf28c36 --- /dev/null +++ b/arch/arm/mm/l2c-common.c @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2010 ARM Ltd. + * Written by Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/bug.h> +#include <linux/smp.h> +#include <asm/outercache.h> + +void outer_disable(void) +{ + WARN_ON(!irqs_disabled()); + WARN_ON(num_online_cpus() > 1); + + if (outer_cache.disable) + outer_cache.disable(); +} diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S new file mode 100644 index 00000000000..99b05f21a59 --- /dev/null +++ b/arch/arm/mm/l2c-l2x0-resume.S @@ -0,0 +1,58 @@ +/* + * L2C-310 early resume code. This can be used by platforms to restore + * the settings of their L2 cache controller before restoring the + * processor state. + * + * This code can only be used to if you are running in the secure world. + */ +#include <linux/linkage.h> +#include <asm/hardware/cache-l2x0.h> + + .text + +ENTRY(l2c310_early_resume) + adr r0, 1f + ldr r2, [r0] + add r0, r2, r0 + + ldmia r0, {r1, r2, r3, r4, r5, r6, r7, r8} + @ r1 = phys address of L2C-310 controller + @ r2 = aux_ctrl + @ r3 = tag_latency + @ r4 = data_latency + @ r5 = filter_start + @ r6 = filter_end + @ r7 = prefetch_ctrl + @ r8 = pwr_ctrl + + @ Check that the address has been initialised + teq r1, #0 + moveq pc, lr + + @ The prefetch and power control registers are revision dependent + @ and can be written whether or not the L2 cache is enabled + ldr r0, [r1, #L2X0_CACHE_ID] + and r0, r0, #L2X0_CACHE_ID_RTL_MASK + cmp r0, #L310_CACHE_ID_RTL_R2P0 + strcs r7, [r1, #L310_PREFETCH_CTRL] + cmp r0, #L310_CACHE_ID_RTL_R3P0 + strcs r8, [r1, #L310_POWER_CTRL] + + @ Don't setup the L2 cache if it is already enabled + ldr r0, [r1, #L2X0_CTRL] + tst r0, #L2X0_CTRL_EN + movne pc, lr + + str r3, [r1, #L310_TAG_LATENCY_CTRL] + str r4, [r1, #L310_DATA_LATENCY_CTRL] + str r6, [r1, #L310_ADDR_FILTER_END] + str r5, [r1, #L310_ADDR_FILTER_START] + + str r2, [r1, #L2X0_AUX_CTRL] + mov r9, #L2X0_CTRL_EN + str r9, [r1, #L2X0_CTRL] + mov pc, lr +ENDPROC(l2c310_early_resume) + + .align +1: .long l2x0_saved_regs - . diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c deleted file mode 100644 index fb5b40289de..00000000000 --- a/arch/arm/mm/mm-armv.c +++ /dev/null @@ -1,624 +0,0 @@ -/* - * linux/arch/arm/mm/mm-armv.c - * - * Copyright (C) 1998-2005 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Page table sludge for ARM v3 and v4 processor architectures. - */ -#include <linux/config.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/highmem.h> -#include <linux/nodemask.h> - -#include <asm/pgalloc.h> -#include <asm/page.h> -#include <asm/io.h> -#include <asm/setup.h> -#include <asm/tlbflush.h> - -#include <asm/mach/map.h> - -#define CPOLICY_UNCACHED 0 -#define CPOLICY_BUFFERED 1 -#define CPOLICY_WRITETHROUGH 2 -#define CPOLICY_WRITEBACK 3 -#define CPOLICY_WRITEALLOC 4 - -static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; -static unsigned int ecc_mask __initdata = 0; -pgprot_t pgprot_kernel; - -EXPORT_SYMBOL(pgprot_kernel); - -pmd_t *top_pmd; - -struct cachepolicy { - const char policy[16]; - unsigned int cr_mask; - unsigned int pmd; - unsigned int pte; -}; - -static struct cachepolicy cache_policies[] __initdata = { - { - .policy = "uncached", - .cr_mask = CR_W|CR_C, - .pmd = PMD_SECT_UNCACHED, - .pte = 0, - }, { - .policy = "buffered", - .cr_mask = CR_C, - .pmd = PMD_SECT_BUFFERED, - .pte = PTE_BUFFERABLE, - }, { - .policy = "writethrough", - .cr_mask = 0, - .pmd = PMD_SECT_WT, - .pte = PTE_CACHEABLE, - }, { - .policy = "writeback", - .cr_mask = 0, - .pmd = PMD_SECT_WB, - .pte = PTE_BUFFERABLE|PTE_CACHEABLE, - }, { - .policy = "writealloc", - .cr_mask = 0, - .pmd = PMD_SECT_WBWA, - .pte = PTE_BUFFERABLE|PTE_CACHEABLE, - } -}; - -/* - * These are useful for identifing cache coherency - * problems by allowing the cache or the cache and - * writebuffer to be turned off. (Note: the write - * buffer should not be on and the cache off). - */ -static void __init early_cachepolicy(char **p) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { - int len = strlen(cache_policies[i].policy); - - if (memcmp(*p, cache_policies[i].policy, len) == 0) { - cachepolicy = i; - cr_alignment &= ~cache_policies[i].cr_mask; - cr_no_alignment &= ~cache_policies[i].cr_mask; - *p += len; - break; - } - } - if (i == ARRAY_SIZE(cache_policies)) - printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n"); - flush_cache_all(); - set_cr(cr_alignment); -} - -static void __init early_nocache(char **__unused) -{ - char *p = "buffered"; - printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p); - early_cachepolicy(&p); -} - -static void __init early_nowrite(char **__unused) -{ - char *p = "uncached"; - printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p); - early_cachepolicy(&p); -} - -static void __init early_ecc(char **p) -{ - if (memcmp(*p, "on", 2) == 0) { - ecc_mask = PMD_PROTECTION; - *p += 2; - } else if (memcmp(*p, "off", 3) == 0) { - ecc_mask = 0; - *p += 3; - } -} - -__early_param("nocache", early_nocache); -__early_param("nowb", early_nowrite); -__early_param("cachepolicy=", early_cachepolicy); -__early_param("ecc=", early_ecc); - -static int __init noalign_setup(char *__unused) -{ - cr_alignment &= ~CR_A; - cr_no_alignment &= ~CR_A; - set_cr(cr_alignment); - return 1; -} - -__setup("noalign", noalign_setup); - -#define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD) - -static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt) -{ - return pmd_offset(pgd, virt); -} - -static inline pmd_t *pmd_off_k(unsigned long virt) -{ - return pmd_off(pgd_offset_k(virt), virt); -} - -/* - * need to get a 16k page for level 1 - */ -pgd_t *get_pgd_slow(struct mm_struct *mm) -{ - pgd_t *new_pgd, *init_pgd; - pmd_t *new_pmd, *init_pmd; - pte_t *new_pte, *init_pte; - - new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2); - if (!new_pgd) - goto no_pgd; - - memzero(new_pgd, FIRST_KERNEL_PGD_NR * sizeof(pgd_t)); - - /* - * Copy over the kernel and IO PGD entries - */ - init_pgd = pgd_offset_k(0); - memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR, - (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t)); - - clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); - - if (!vectors_high()) { - /* - * On ARM, first page must always be allocated since it - * contains the machine vectors. - */ - new_pmd = pmd_alloc(mm, new_pgd, 0); - if (!new_pmd) - goto no_pmd; - - new_pte = pte_alloc_map(mm, new_pmd, 0); - if (!new_pte) - goto no_pte; - - init_pmd = pmd_offset(init_pgd, 0); - init_pte = pte_offset_map_nested(init_pmd, 0); - set_pte(new_pte, *init_pte); - pte_unmap_nested(init_pte); - pte_unmap(new_pte); - } - - return new_pgd; - -no_pte: - pmd_free(new_pmd); -no_pmd: - free_pages((unsigned long)new_pgd, 2); -no_pgd: - return NULL; -} - -void free_pgd_slow(pgd_t *pgd) -{ - pmd_t *pmd; - struct page *pte; - - if (!pgd) - return; - - /* pgd is always present and good */ - pmd = pmd_off(pgd, 0); - if (pmd_none(*pmd)) - goto free; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); - goto free; - } - - pte = pmd_page(*pmd); - pmd_clear(pmd); - dec_page_state(nr_page_table_pages); - pte_lock_deinit(pte); - pte_free(pte); - pmd_free(pmd); -free: - free_pages((unsigned long) pgd, 2); -} - -/* - * Create a SECTION PGD between VIRT and PHYS in domain - * DOMAIN with protection PROT. This operates on half- - * pgdir entry increments. - */ -static inline void -alloc_init_section(unsigned long virt, unsigned long phys, int prot) -{ - pmd_t *pmdp = pmd_off_k(virt); - - if (virt & (1 << 20)) - pmdp++; - - *pmdp = __pmd(phys | prot); - flush_pmd_entry(pmdp); -} - -/* - * Create a SUPER SECTION PGD between VIRT and PHYS with protection PROT - */ -static inline void -alloc_init_supersection(unsigned long virt, unsigned long phys, int prot) -{ - int i; - - for (i = 0; i < 16; i += 1) { - alloc_init_section(virt, phys, prot | PMD_SECT_SUPER); - - virt += (PGDIR_SIZE / 2); - } -} - -/* - * Add a PAGE mapping between VIRT and PHYS in domain - * DOMAIN with protection PROT. Note that due to the - * way we map the PTEs, we must allocate two PTE_SIZE'd - * blocks - one for the Linux pte table, and one for - * the hardware pte table. - */ -static inline void -alloc_init_page(unsigned long virt, unsigned long phys, unsigned int prot_l1, pgprot_t prot) -{ - pmd_t *pmdp = pmd_off_k(virt); - pte_t *ptep; - - if (pmd_none(*pmdp)) { - ptep = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * - sizeof(pte_t)); - - __pmd_populate(pmdp, __pa(ptep) | prot_l1); - } - ptep = pte_offset_kernel(pmdp, virt); - - set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot)); -} - -struct mem_types { - unsigned int prot_pte; - unsigned int prot_l1; - unsigned int prot_sect; - unsigned int domain; -}; - -static struct mem_types mem_types[] __initdata = { - [MT_DEVICE] = { - .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE, - .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PMD_TYPE_SECT | PMD_SECT_UNCACHED | - PMD_SECT_AP_WRITE, - .domain = DOMAIN_IO, - }, - [MT_CACHECLEAN] = { - .prot_sect = PMD_TYPE_SECT, - .domain = DOMAIN_KERNEL, - }, - [MT_MINICLEAN] = { - .prot_sect = PMD_TYPE_SECT | PMD_SECT_MINICACHE, - .domain = DOMAIN_KERNEL, - }, - [MT_LOW_VECTORS] = { - .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_EXEC, - .prot_l1 = PMD_TYPE_TABLE, - .domain = DOMAIN_USER, - }, - [MT_HIGH_VECTORS] = { - .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_USER | L_PTE_EXEC, - .prot_l1 = PMD_TYPE_TABLE, - .domain = DOMAIN_USER, - }, - [MT_MEMORY] = { - .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, - .domain = DOMAIN_KERNEL, - }, - [MT_ROM] = { - .prot_sect = PMD_TYPE_SECT, - .domain = DOMAIN_KERNEL, - }, - [MT_IXP2000_DEVICE] = { /* IXP2400 requires XCB=101 for on-chip I/O */ - .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | - L_PTE_WRITE, - .prot_l1 = PMD_TYPE_TABLE, - .prot_sect = PMD_TYPE_SECT | PMD_SECT_UNCACHED | - PMD_SECT_AP_WRITE | PMD_SECT_BUFFERABLE | - PMD_SECT_TEX(1), - .domain = DOMAIN_IO, - } -}; - -/* - * Adjust the PMD section entries according to the CPU in use. - */ -void __init build_mem_type_table(void) -{ - struct cachepolicy *cp; - unsigned int cr = get_cr(); - unsigned int user_pgprot; - int cpu_arch = cpu_architecture(); - int i; - -#if defined(CONFIG_CPU_DCACHE_DISABLE) - if (cachepolicy > CPOLICY_BUFFERED) - cachepolicy = CPOLICY_BUFFERED; -#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH) - if (cachepolicy > CPOLICY_WRITETHROUGH) - cachepolicy = CPOLICY_WRITETHROUGH; -#endif - if (cpu_arch < CPU_ARCH_ARMv5) { - if (cachepolicy >= CPOLICY_WRITEALLOC) - cachepolicy = CPOLICY_WRITEBACK; - ecc_mask = 0; - } - - if (cpu_arch <= CPU_ARCH_ARMv5TEJ) { - for (i = 0; i < ARRAY_SIZE(mem_types); i++) { - if (mem_types[i].prot_l1) - mem_types[i].prot_l1 |= PMD_BIT4; - if (mem_types[i].prot_sect) - mem_types[i].prot_sect |= PMD_BIT4; - } - } - - cp = &cache_policies[cachepolicy]; - user_pgprot = cp->pte; - - /* - * ARMv6 and above have extended page tables. - */ - if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { - /* - * bit 4 becomes XN which we must clear for the - * kernel memory mapping. - */ - mem_types[MT_MEMORY].prot_sect &= ~PMD_BIT4; - mem_types[MT_ROM].prot_sect &= ~PMD_BIT4; - /* - * Mark cache clean areas and XIP ROM read only - * from SVC mode and no access from userspace. - */ - mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; - mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; - mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; - - /* - * Mark the device area as "shared device" - */ - mem_types[MT_DEVICE].prot_pte |= L_PTE_BUFFERABLE; - mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; - - /* - * User pages need to be mapped with the ASID - * (iow, non-global) - */ - user_pgprot |= L_PTE_ASID; - } - - if (cpu_arch >= CPU_ARCH_ARMv5) { - mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE; - mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE; - } else { - mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte; - mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte; - mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1); - } - - mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; - mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; - mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; - mem_types[MT_ROM].prot_sect |= cp->pmd; - - for (i = 0; i < 16; i++) { - unsigned long v = pgprot_val(protection_map[i]); - v = (v & ~(PTE_BUFFERABLE|PTE_CACHEABLE)) | user_pgprot; - protection_map[i] = __pgprot(v); - } - - pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | - L_PTE_DIRTY | L_PTE_WRITE | - L_PTE_EXEC | cp->pte); - - switch (cp->pmd) { - case PMD_SECT_WT: - mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT; - break; - case PMD_SECT_WB: - case PMD_SECT_WBWA: - mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; - break; - } - printk("Memory policy: ECC %sabled, Data cache %s\n", - ecc_mask ? "en" : "dis", cp->policy); -} - -#define vectors_base() (vectors_high() ? 0xffff0000 : 0) - -/* - * Create the page directory entries and any necessary - * page tables for the mapping specified by `md'. We - * are able to cope here with varying sizes and address - * offsets, and we take full advantage of sections and - * supersections. - */ -void __init create_mapping(struct map_desc *md) -{ - unsigned long virt, length; - int prot_sect, prot_l1, domain; - pgprot_t prot_pte; - unsigned long off = (u32)__pfn_to_phys(md->pfn); - - if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { - printk(KERN_WARNING "BUG: not creating mapping for " - "0x%08llx at 0x%08lx in user region\n", - __pfn_to_phys((u64)md->pfn), md->virtual); - return; - } - - if ((md->type == MT_DEVICE || md->type == MT_ROM) && - md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) { - printk(KERN_WARNING "BUG: mapping for 0x%08llx at 0x%08lx " - "overlaps vmalloc space\n", - __pfn_to_phys((u64)md->pfn), md->virtual); - } - - domain = mem_types[md->type].domain; - prot_pte = __pgprot(mem_types[md->type].prot_pte); - prot_l1 = mem_types[md->type].prot_l1 | PMD_DOMAIN(domain); - prot_sect = mem_types[md->type].prot_sect | PMD_DOMAIN(domain); - - /* - * Catch 36-bit addresses - */ - if(md->pfn >= 0x100000) { - if(domain) { - printk(KERN_ERR "MM: invalid domain in supersection " - "mapping for 0x%08llx at 0x%08lx\n", - __pfn_to_phys((u64)md->pfn), md->virtual); - return; - } - if((md->virtual | md->length | __pfn_to_phys(md->pfn)) - & ~SUPERSECTION_MASK) { - printk(KERN_ERR "MM: cannot create mapping for " - "0x%08llx at 0x%08lx invalid alignment\n", - __pfn_to_phys((u64)md->pfn), md->virtual); - return; - } - - /* - * Shift bits [35:32] of address into bits [23:20] of PMD - * (See ARMv6 spec). - */ - off |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20); - } - - virt = md->virtual; - off -= virt; - length = md->length; - - if (mem_types[md->type].prot_l1 == 0 && - (virt & 0xfffff || (virt + off) & 0xfffff || (virt + length) & 0xfffff)) { - printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not " - "be mapped using pages, ignoring.\n", - __pfn_to_phys(md->pfn), md->virtual); - return; - } - - while ((virt & 0xfffff || (virt + off) & 0xfffff) && length >= PAGE_SIZE) { - alloc_init_page(virt, virt + off, prot_l1, prot_pte); - - virt += PAGE_SIZE; - length -= PAGE_SIZE; - } - - /* N.B. ARMv6 supersections are only defined to work with domain 0. - * Since domain assignments can in fact be arbitrary, the - * 'domain == 0' check below is required to insure that ARMv6 - * supersections are only allocated for domain 0 regardless - * of the actual domain assignments in use. - */ - if (cpu_architecture() >= CPU_ARCH_ARMv6 && domain == 0) { - /* - * Align to supersection boundary if !high pages. - * High pages have already been checked for proper - * alignment above and they will fail the SUPSERSECTION_MASK - * check because of the way the address is encoded into - * offset. - */ - if (md->pfn <= 0x100000) { - while ((virt & ~SUPERSECTION_MASK || - (virt + off) & ~SUPERSECTION_MASK) && - length >= (PGDIR_SIZE / 2)) { - alloc_init_section(virt, virt + off, prot_sect); - - virt += (PGDIR_SIZE / 2); - length -= (PGDIR_SIZE / 2); - } - } - - while (length >= SUPERSECTION_SIZE) { - alloc_init_supersection(virt, virt + off, prot_sect); - - virt += SUPERSECTION_SIZE; - length -= SUPERSECTION_SIZE; - } - } - - /* - * A section mapping covers half a "pgdir" entry. - */ - while (length >= (PGDIR_SIZE / 2)) { - alloc_init_section(virt, virt + off, prot_sect); - - virt += (PGDIR_SIZE / 2); - length -= (PGDIR_SIZE / 2); - } - - while (length >= PAGE_SIZE) { - alloc_init_page(virt, virt + off, prot_l1, prot_pte); - - virt += PAGE_SIZE; - length -= PAGE_SIZE; - } -} - -/* - * In order to soft-boot, we need to insert a 1:1 mapping in place of - * the user-mode pages. This will then ensure that we have predictable - * results when turning the mmu off - */ -void setup_mm_for_reboot(char mode) -{ - unsigned long base_pmdval; - pgd_t *pgd; - int i; - - if (current->mm && current->mm->pgd) - pgd = current->mm->pgd; - else - pgd = init_mm.pgd; - - base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT; - if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ) - base_pmdval |= PMD_BIT4; - - for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++, pgd++) { - unsigned long pmdval = (i << PGDIR_SHIFT) | base_pmdval; - pmd_t *pmd; - - pmd = pmd_off(pgd, i << PGDIR_SHIFT); - pmd[0] = __pmd(pmdval); - pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1))); - flush_pmd_entry(pmd); - } -} - -/* - * Create the architecture specific mappings - */ -void __init iotable_init(struct map_desc *io_desc, int nr) -{ - int i; - - for (i = 0; i < nr; i++) - create_mapping(io_desc + i); -} diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h new file mode 100644 index 00000000000..ce727d47275 --- /dev/null +++ b/arch/arm/mm/mm.h @@ -0,0 +1,99 @@ +#ifdef CONFIG_MMU +#include <linux/list.h> +#include <linux/vmalloc.h> + +#include <asm/pgtable.h> + +/* the upper-most page table pointer */ +extern pmd_t *top_pmd; + +/* + * 0xffff8000 to 0xffffffff is reserved for any ARM architecture + * specific hacks for copying pages efficiently, while 0xffff4000 + * is reserved for VIPT aliasing flushing by generic code. + * + * Note that we don't allow VIPT aliasing caches with SMP. + */ +#define COPYPAGE_MINICACHE 0xffff8000 +#define COPYPAGE_V6_FROM 0xffff8000 +#define COPYPAGE_V6_TO 0xffffc000 +/* PFN alias flushing, for VIPT caches */ +#define FLUSH_ALIAS_START 0xffff4000 + +static inline void set_top_pte(unsigned long va, pte_t pte) +{ + pte_t *ptep = pte_offset_kernel(top_pmd, va); + set_pte_ext(ptep, pte, 0); + local_flush_tlb_kernel_page(va); +} + +static inline pte_t get_top_pte(unsigned long va) +{ + pte_t *ptep = pte_offset_kernel(top_pmd, va); + return *ptep; +} + +static inline pmd_t *pmd_off_k(unsigned long virt) +{ + return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt); +} + +struct mem_type { + pteval_t prot_pte; + pteval_t prot_pte_s2; + pmdval_t prot_l1; + pmdval_t prot_sect; + unsigned int domain; +}; + +const struct mem_type *get_mem_type(unsigned int type); + +extern void __flush_dcache_page(struct address_space *mapping, struct page *page); + +/* + * ARM specific vm_struct->flags bits. + */ + +/* (super)section-mapped I/O regions used by ioremap()/iounmap() */ +#define VM_ARM_SECTION_MAPPING 0x80000000 + +/* permanent static mappings from iotable_init() */ +#define VM_ARM_STATIC_MAPPING 0x40000000 + +/* empty mapping */ +#define VM_ARM_EMPTY_MAPPING 0x20000000 + +/* mapping type (attributes) for permanent static mappings */ +#define VM_ARM_MTYPE(mt) ((mt) << 20) +#define VM_ARM_MTYPE_MASK (0x1f << 20) + +/* consistent regions used by dma_alloc_attrs() */ +#define VM_ARM_DMA_CONSISTENT 0x20000000 + + +struct static_vm { + struct vm_struct vm; + struct list_head list; +}; + +extern struct list_head static_vmlist; +extern struct static_vm *find_static_vm_vaddr(void *vaddr); +extern __init void add_static_vm_early(struct static_vm *svm); + +#endif + +#ifdef CONFIG_ZONE_DMA +extern phys_addr_t arm_dma_limit; +extern unsigned long arm_dma_pfn_limit; +#else +#define arm_dma_limit ((phys_addr_t)~0) +#define arm_dma_pfn_limit (~0ul >> PAGE_SHIFT) +#endif + +extern phys_addr_t arm_lowmem_limit; + +void __init bootmem_init(void); +void arm_mm_memblock_reserve(void); +void dma_contiguous_remap(void); + +unsigned long __clear_cr(unsigned long mask); diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 3de7f84b53c..5e85ed37136 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -1,18 +1,47 @@ /* * linux/arch/arm/mm/mmap.c */ -#include <linux/config.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/shm.h> - -#include <asm/system.h> +#include <linux/sched.h> +#include <linux/io.h> +#include <linux/personality.h> +#include <linux/random.h> +#include <asm/cachetype.h> #define COLOUR_ALIGN(addr,pgoff) \ ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) +/* gap between mmap and stack */ +#define MIN_GAP (128*1024*1024UL) +#define MAX_GAP ((TASK_SIZE)/6*5) + +static int mmap_is_legacy(void) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +static unsigned long mmap_base(unsigned long rnd) +{ + unsigned long gap = rlimit(RLIMIT_STACK); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(TASK_SIZE - gap - rnd); +} + /* * We need to ensure that shared mappings are correctly aligned to * avoid aliasing issues with VIPT caches. We need to ensure that @@ -28,33 +57,23 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; -#ifdef CONFIG_CPU_V6 - unsigned int cache_type; - int do_align = 0, aliasing = 0; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; /* * We only need to do colour alignment if either the I or D - * caches alias. This is indicated by bits 9 and 21 of the - * cache type register. + * caches alias. */ - cache_type = read_cpuid(CPUID_CACHETYPE); - if (cache_type != read_cpuid(CPUID_ID)) { - aliasing = (cache_type | cache_type >> 12) & (1 << 11); - if (aliasing) - do_align = filp || flags & MAP_SHARED; - } -#else -#define do_align 0 -#define aliasing 0 -#endif + if (aliasing) + do_align = filp || (flags & MAP_SHARED); /* - * We should enforce the MAP_FIXED case. However, currently - * the generic kernel code doesn't allow us to handle this. + * We enforce the MAP_FIXED case. */ if (flags & MAP_FIXED) { - if (aliasing && flags & MAP_SHARED && addr & (SHMLBA - 1)) + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) return -EINVAL; return addr; } @@ -73,45 +92,141 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, (!vma || addr + len <= vma->vm_start)) return addr; } - if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; - } else { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; + + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; + + /* + * We only need to do colour alignment if either the I or D + * caches alias. + */ + if (aliasing) + do_align = filp || (flags & MAP_SHARED); + + /* requested length too big for entire address space */ + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) + return -EINVAL; + return addr; } -full_search: - if (do_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (!vma || addr + len <= vma->vm_start) { - /* - * Remember the place where we stopped the search: - */ - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = vma->vm_end; + /* requesting a specific address */ + if (addr) { if (do_align) addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = FIRST_USER_ADDRESS; + info.high_limit = mm->mmap_base; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); } + + return addr; +} + +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + unsigned long random_factor = 0UL; + + /* 8 bits of randomness in 20 address space bits */ + if ((current->flags & PF_RANDOMIZE) && + !(current->personality & ADDR_NO_RANDOMIZE)) + random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT; + + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + } else { + mm->mmap_base = mmap_base(random_factor); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + } +} + +/* + * You really shouldn't be using read() or write() on /dev/mem. This + * might go away in the future. + */ +int valid_phys_addr_range(phys_addr_t addr, size_t size) +{ + if (addr < PHYS_OFFSET) + return 0; + if (addr + size > __pa(high_memory - 1) + 1) + return 0; + + return 1; +} + +/* + * Do not allow /dev/mem mappings beyond the supported physical range. + */ +int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) +{ + return (pfn + (size >> PAGE_SHIFT)) <= (1 + (PHYS_MASK >> PAGE_SHIFT)); } +#ifdef CONFIG_STRICT_DEVMEM + +#include <linux/ioport.h> + +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain + * address is valid. The argument is a physical page number. + * We mimic x86 here by disallowing access to system RAM as well as + * device-exclusive MMIO regions. This effectively disable read()/write() + * on /dev/mem. + */ +int devmem_is_allowed(unsigned long pfn) +{ + if (iomem_is_exclusive(pfn << PAGE_SHIFT)) + return 0; + if (!page_is_ram(pfn)) + return 1; + return 0; +} + +#endif diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 0d90227a0a3..6e3ba8d112a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1,45 +1,1494 @@ /* * linux/arch/arm/mm/mmu.c * - * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * Copyright (C) 1995-2005 Russell King * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> #include <linux/init.h> -#include <linux/sched.h> -#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/nodemask.h> +#include <linux/memblock.h> +#include <linux/fs.h> +#include <linux/vmalloc.h> +#include <linux/sizes.h> -#include <asm/mmu_context.h> -#include <asm/tlbflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/sections.h> +#include <asm/cachetype.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/smp_plat.h> +#include <asm/tlb.h> +#include <asm/highmem.h> +#include <asm/system_info.h> +#include <asm/traps.h> +#include <asm/procinfo.h> +#include <asm/memory.h> -unsigned int cpu_last_asid = { 1 << ASID_BITS }; +#include <asm/mach/arch.h> +#include <asm/mach/map.h> +#include <asm/mach/pci.h> +#include <asm/fixmap.h> + +#include "mm.h" +#include "tcm.h" + +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + +/* + * The pmd table for the upper-most set of pages. + */ +pmd_t *top_pmd; + +#define CPOLICY_UNCACHED 0 +#define CPOLICY_BUFFERED 1 +#define CPOLICY_WRITETHROUGH 2 +#define CPOLICY_WRITEBACK 3 +#define CPOLICY_WRITEALLOC 4 + +static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; +static unsigned int ecc_mask __initdata = 0; +pgprot_t pgprot_user; +pgprot_t pgprot_kernel; +pgprot_t pgprot_hyp_device; +pgprot_t pgprot_s2; +pgprot_t pgprot_s2_device; + +EXPORT_SYMBOL(pgprot_user); +EXPORT_SYMBOL(pgprot_kernel); + +struct cachepolicy { + const char policy[16]; + unsigned int cr_mask; + pmdval_t pmd; + pteval_t pte; + pteval_t pte_s2; +}; + +#ifdef CONFIG_ARM_LPAE +#define s2_policy(policy) policy +#else +#define s2_policy(policy) 0 +#endif + +static struct cachepolicy cache_policies[] __initdata = { + { + .policy = "uncached", + .cr_mask = CR_W|CR_C, + .pmd = PMD_SECT_UNCACHED, + .pte = L_PTE_MT_UNCACHED, + .pte_s2 = s2_policy(L_PTE_S2_MT_UNCACHED), + }, { + .policy = "buffered", + .cr_mask = CR_C, + .pmd = PMD_SECT_BUFFERED, + .pte = L_PTE_MT_BUFFERABLE, + .pte_s2 = s2_policy(L_PTE_S2_MT_UNCACHED), + }, { + .policy = "writethrough", + .cr_mask = 0, + .pmd = PMD_SECT_WT, + .pte = L_PTE_MT_WRITETHROUGH, + .pte_s2 = s2_policy(L_PTE_S2_MT_WRITETHROUGH), + }, { + .policy = "writeback", + .cr_mask = 0, + .pmd = PMD_SECT_WB, + .pte = L_PTE_MT_WRITEBACK, + .pte_s2 = s2_policy(L_PTE_S2_MT_WRITEBACK), + }, { + .policy = "writealloc", + .cr_mask = 0, + .pmd = PMD_SECT_WBWA, + .pte = L_PTE_MT_WRITEALLOC, + .pte_s2 = s2_policy(L_PTE_S2_MT_WRITEBACK), + } +}; + +#ifdef CONFIG_CPU_CP15 +static unsigned long initial_pmd_value __initdata = 0; + +/* + * Initialise the cache_policy variable with the initial state specified + * via the "pmd" value. This is used to ensure that on ARMv6 and later, + * the C code sets the page tables up with the same policy as the head + * assembly code, which avoids an illegal state where the TLBs can get + * confused. See comments in early_cachepolicy() for more information. + */ +void __init init_default_cache_policy(unsigned long pmd) +{ + int i; + + initial_pmd_value = pmd; + + pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE; + + for (i = 0; i < ARRAY_SIZE(cache_policies); i++) + if (cache_policies[i].pmd == pmd) { + cachepolicy = i; + break; + } + + if (i == ARRAY_SIZE(cache_policies)) + pr_err("ERROR: could not find cache policy\n"); +} + +/* + * These are useful for identifying cache coherency problems by allowing + * the cache or the cache and writebuffer to be turned off. (Note: the + * write buffer should not be on and the cache off). + */ +static int __init early_cachepolicy(char *p) +{ + int i, selected = -1; + + for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { + int len = strlen(cache_policies[i].policy); + + if (memcmp(p, cache_policies[i].policy, len) == 0) { + selected = i; + break; + } + } + + if (selected == -1) + pr_err("ERROR: unknown or unsupported cache policy\n"); + + /* + * This restriction is partly to do with the way we boot; it is + * unpredictable to have memory mapped using two different sets of + * memory attributes (shared, type, and cache attribs). We can not + * change these attributes once the initial assembly has setup the + * page tables. + */ + if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) { + pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n", + cache_policies[cachepolicy].policy); + return 0; + } + + if (selected != cachepolicy) { + unsigned long cr = __clear_cr(cache_policies[selected].cr_mask); + cachepolicy = selected; + flush_cache_all(); + set_cr(cr); + } + return 0; +} +early_param("cachepolicy", early_cachepolicy); + +static int __init early_nocache(char *__unused) +{ + char *p = "buffered"; + printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p); + early_cachepolicy(p); + return 0; +} +early_param("nocache", early_nocache); + +static int __init early_nowrite(char *__unused) +{ + char *p = "uncached"; + printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p); + early_cachepolicy(p); + return 0; +} +early_param("nowb", early_nowrite); + +#ifndef CONFIG_ARM_LPAE +static int __init early_ecc(char *p) +{ + if (memcmp(p, "on", 2) == 0) + ecc_mask = PMD_PROTECTION; + else if (memcmp(p, "off", 3) == 0) + ecc_mask = 0; + return 0; +} +early_param("ecc", early_ecc); +#endif + +#else /* ifdef CONFIG_CPU_CP15 */ + +static int __init early_cachepolicy(char *p) +{ + pr_warning("cachepolicy kernel parameter not supported without cp15\n"); +} +early_param("cachepolicy", early_cachepolicy); + +static int __init noalign_setup(char *__unused) +{ + pr_warning("noalign kernel parameter not supported without cp15\n"); +} +__setup("noalign", noalign_setup); + +#endif /* ifdef CONFIG_CPU_CP15 / else */ + +#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN +#define PROT_PTE_S2_DEVICE PROT_PTE_DEVICE +#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE + +static struct mem_type mem_types[] = { + [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | + L_PTE_SHARED, + .prot_pte_s2 = s2_policy(PROT_PTE_S2_DEVICE) | + s2_policy(L_PTE_S2_MT_DEV_SHARED) | + L_PTE_SHARED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_CACHED] = { /* ioremap_cached */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_WC] = { /* ioremap_wc */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE, + .domain = DOMAIN_IO, + }, + [MT_UNCACHED] = { + .prot_pte = PROT_PTE_DEVICE, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, + .domain = DOMAIN_IO, + }, + [MT_CACHECLEAN] = { + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, + .domain = DOMAIN_KERNEL, + }, +#ifndef CONFIG_ARM_LPAE + [MT_MINICLEAN] = { + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE, + .domain = DOMAIN_KERNEL, + }, +#endif + [MT_LOW_VECTORS] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_USER, + }, + [MT_HIGH_VECTORS] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_USER | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_USER, + }, + [MT_MEMORY_RWX] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RW] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, + .domain = DOMAIN_KERNEL, + }, + [MT_ROM] = { + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RWX_NONCACHED] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_MT_BUFFERABLE, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RW_DTCM] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RWX_ITCM] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RW_SO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_MT_UNCACHED | L_PTE_XN, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S | + PMD_SECT_UNCACHED | PMD_SECT_XN, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_DMA_READY] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_KERNEL, + }, +}; + +const struct mem_type *get_mem_type(unsigned int type) +{ + return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL; +} +EXPORT_SYMBOL(get_mem_type); + +#define PTE_SET_FN(_name, pteop) \ +static int pte_set_##_name(pte_t *ptep, pgtable_t token, unsigned long addr, \ + void *data) \ +{ \ + pte_t pte = pteop(*ptep); \ +\ + set_pte_ext(ptep, pte, 0); \ + return 0; \ +} \ + +#define SET_MEMORY_FN(_name, callback) \ +int set_memory_##_name(unsigned long addr, int numpages) \ +{ \ + unsigned long start = addr; \ + unsigned long size = PAGE_SIZE*numpages; \ + unsigned end = start + size; \ +\ + if (start < MODULES_VADDR || start >= MODULES_END) \ + return -EINVAL;\ +\ + if (end < MODULES_VADDR || end >= MODULES_END) \ + return -EINVAL; \ +\ + apply_to_page_range(&init_mm, start, size, callback, NULL); \ + flush_tlb_kernel_range(start, end); \ + return 0;\ +} + +PTE_SET_FN(ro, pte_wrprotect) +PTE_SET_FN(rw, pte_mkwrite) +PTE_SET_FN(x, pte_mkexec) +PTE_SET_FN(nx, pte_mknexec) + +SET_MEMORY_FN(ro, pte_set_ro) +SET_MEMORY_FN(rw, pte_set_rw) +SET_MEMORY_FN(x, pte_set_x) +SET_MEMORY_FN(nx, pte_set_nx) /* - * We fork()ed a process, and we need a new context for the child - * to run in. We reserve version 0 for initial tasks so we will - * always allocate an ASID. + * Adjust the PMD section entries according to the CPU in use. */ -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) +static void __init build_mem_type_table(void) +{ + struct cachepolicy *cp; + unsigned int cr = get_cr(); + pteval_t user_pgprot, kern_pgprot, vecs_pgprot; + pteval_t hyp_device_pgprot, s2_pgprot, s2_device_pgprot; + int cpu_arch = cpu_architecture(); + int i; + + if (cpu_arch < CPU_ARCH_ARMv6) { +#if defined(CONFIG_CPU_DCACHE_DISABLE) + if (cachepolicy > CPOLICY_BUFFERED) + cachepolicy = CPOLICY_BUFFERED; +#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH) + if (cachepolicy > CPOLICY_WRITETHROUGH) + cachepolicy = CPOLICY_WRITETHROUGH; +#endif + } + if (cpu_arch < CPU_ARCH_ARMv5) { + if (cachepolicy >= CPOLICY_WRITEALLOC) + cachepolicy = CPOLICY_WRITEBACK; + ecc_mask = 0; + } + + if (is_smp()) { + if (cachepolicy != CPOLICY_WRITEALLOC) { + pr_warn("Forcing write-allocate cache policy for SMP\n"); + cachepolicy = CPOLICY_WRITEALLOC; + } + if (!(initial_pmd_value & PMD_SECT_S)) { + pr_warn("Forcing shared mappings for SMP\n"); + initial_pmd_value |= PMD_SECT_S; + } + } + + /* + * Strip out features not present on earlier architectures. + * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those + * without extended page tables don't have the 'Shared' bit. + */ + if (cpu_arch < CPU_ARCH_ARMv5) + for (i = 0; i < ARRAY_SIZE(mem_types); i++) + mem_types[i].prot_sect &= ~PMD_SECT_TEX(7); + if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3()) + for (i = 0; i < ARRAY_SIZE(mem_types); i++) + mem_types[i].prot_sect &= ~PMD_SECT_S; + + /* + * ARMv5 and lower, bit 4 must be set for page tables (was: cache + * "update-able on write" bit on ARM610). However, Xscale and + * Xscale3 require this bit to be cleared. + */ + if (cpu_is_xscale() || cpu_is_xsc3()) { + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + mem_types[i].prot_sect &= ~PMD_BIT4; + mem_types[i].prot_l1 &= ~PMD_BIT4; + } + } else if (cpu_arch < CPU_ARCH_ARMv6) { + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + if (mem_types[i].prot_l1) + mem_types[i].prot_l1 |= PMD_BIT4; + if (mem_types[i].prot_sect) + mem_types[i].prot_sect |= PMD_BIT4; + } + } + + /* + * Mark the device areas according to the CPU/architecture. + */ + if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) { + if (!cpu_is_xsc3()) { + /* + * Mark device regions on ARMv6+ as execute-never + * to prevent speculative instruction fetches. + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; + + /* Also setup NX memory mapping */ + mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; + } + if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { + /* + * For ARMv7 with TEX remapping, + * - shared device is SXCB=1100 + * - nonshared device is SXCB=0100 + * - write combine device mem is SXCB=0001 + * (Uncached Normal memory) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } else if (cpu_is_xsc3()) { + /* + * For Xscale3, + * - shared device is TEXCB=00101 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Inner/Outer Uncacheable in xsc3 parlance) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } else { + /* + * For ARMv6 and ARMv7 without TEX remapping, + * - shared device is TEXCB=00001 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Uncached Normal in ARMv6 parlance). + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } + } else { + /* + * On others, write combining is "Uncached/Buffered" + */ + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } + + /* + * Now deal with the memory-type mappings + */ + cp = &cache_policies[cachepolicy]; + vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; + s2_pgprot = cp->pte_s2; + hyp_device_pgprot = mem_types[MT_DEVICE].prot_pte; + s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2; + + /* + * We don't use domains on ARMv6 (since this causes problems with + * v6/v7 kernels), so we must use a separate memory type for user + * r/o, kernel r/w to map the vectors page. + */ +#ifndef CONFIG_ARM_LPAE + if (cpu_arch == CPU_ARCH_ARMv6) + vecs_pgprot |= L_PTE_MT_VECTORS; +#endif + + /* + * ARMv6 and above have extended page tables. + */ + if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { +#ifndef CONFIG_ARM_LPAE + /* + * Mark cache clean areas and XIP ROM read only + * from SVC mode and no access from userspace. + */ + mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; +#endif + + /* + * If the initial page tables were created with the S bit + * set, then we need to do the same here for the same + * reasons given in early_cachepolicy(). + */ + if (initial_pmd_value & PMD_SECT_S) { + user_pgprot |= L_PTE_SHARED; + kern_pgprot |= L_PTE_SHARED; + vecs_pgprot |= L_PTE_SHARED; + s2_pgprot |= L_PTE_SHARED; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; + } + } + + /* + * Non-cacheable Normal - intended for memory areas that must + * not cause dirty cache line writebacks when used + */ + if (cpu_arch >= CPU_ARCH_ARMv6) { + if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { + /* Non-cacheable Normal is XCB = 001 */ + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= + PMD_SECT_BUFFERED; + } else { + /* For both ARMv6 and non-TEX-remapping ARMv7 */ + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= + PMD_SECT_TEX(1); + } + } else { + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE; + } + +#ifdef CONFIG_ARM_LPAE + /* + * Do not generate access flag faults for the kernel mappings. + */ + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + mem_types[i].prot_pte |= PTE_EXT_AF; + if (mem_types[i].prot_sect) + mem_types[i].prot_sect |= PMD_SECT_AF; + } + kern_pgprot |= PTE_EXT_AF; + vecs_pgprot |= PTE_EXT_AF; +#endif + + for (i = 0; i < 16; i++) { + pteval_t v = pgprot_val(protection_map[i]); + protection_map[i] = __pgprot(v | user_pgprot); + } + + mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot; + mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot; + + pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); + pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | + L_PTE_DIRTY | kern_pgprot); + pgprot_s2 = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | s2_pgprot); + pgprot_s2_device = __pgprot(s2_device_pgprot); + pgprot_hyp_device = __pgprot(hyp_device_pgprot); + + mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; + mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; + mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; + mem_types[MT_ROM].prot_sect |= cp->pmd; + + switch (cp->pmd) { + case PMD_SECT_WT: + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT; + break; + case PMD_SECT_WB: + case PMD_SECT_WBWA: + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; + break; + } + pr_info("Memory policy: %sData cache %s\n", + ecc_mask ? "ECC enabled, " : "", cp->policy); + + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + struct mem_type *t = &mem_types[i]; + if (t->prot_l1) + t->prot_l1 |= PMD_DOMAIN(t->domain); + if (t->prot_sect) + t->prot_sect |= PMD_DOMAIN(t->domain); + } +} + +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + if (!pfn_valid(pfn)) + return pgprot_noncached(vma_prot); + else if (file->f_flags & O_SYNC) + return pgprot_writecombine(vma_prot); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); +#endif + +#define vectors_base() (vectors_high() ? 0xffff0000 : 0) + +static void __init *early_alloc_aligned(unsigned long sz, unsigned long align) +{ + void *ptr = __va(memblock_alloc(sz, align)); + memset(ptr, 0, sz); + return ptr; +} + +static void __init *early_alloc(unsigned long sz) +{ + return early_alloc_aligned(sz, sz); +} + +static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot) +{ + if (pmd_none(*pmd)) { + pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE); + __pmd_populate(pmd, __pa(pte), prot); + } + BUG_ON(pmd_bad(*pmd)); + return pte_offset_kernel(pmd, addr); +} + +static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, + unsigned long end, unsigned long pfn, + const struct mem_type *type) +{ + pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1); + do { + set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +static void __init __map_init_section(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) +{ + pmd_t *p = pmd; + +#ifndef CONFIG_ARM_LPAE + /* + * In classic MMU format, puds and pmds are folded in to + * the pgds. pmd_offset gives the PGD entry. PGDs refer to a + * group of L1 entries making up one logical pointer to + * an L2 table (2MB), where as PMDs refer to the individual + * L1 entries (1MB). Hence increment to get the correct + * offset for odd 1MB sections. + * (See arch/arm/include/asm/pgtable-2level.h) + */ + if (addr & SECTION_SIZE) + pmd++; +#endif + do { + *pmd = __pmd(phys | type->prot_sect); + phys += SECTION_SIZE; + } while (pmd++, addr += SECTION_SIZE, addr != end); + + flush_pmd_entry(p); +} + +static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) +{ + pmd_t *pmd = pmd_offset(pud, addr); + unsigned long next; + + do { + /* + * With LPAE, we must loop over to map + * all the pmds for the given range. + */ + next = pmd_addr_end(addr, end); + + /* + * Try a section mapping - addr, next and phys must all be + * aligned to a section boundary. + */ + if (type->prot_sect && + ((addr | next | phys) & ~SECTION_MASK) == 0) { + __map_init_section(pmd, addr, next, phys, type); + } else { + alloc_init_pte(pmd, addr, next, + __phys_to_pfn(phys), type); + } + + phys += next - addr; + + } while (pmd++, addr = next, addr != end); +} + +static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + alloc_init_pmd(pud, addr, next, phys, type); + phys += next - addr; + } while (pud++, addr = next, addr != end); +} + +#ifndef CONFIG_ARM_LPAE +static void __init create_36bit_mapping(struct map_desc *md, + const struct mem_type *type) { - mm->context.id = 0; + unsigned long addr, length, end; + phys_addr_t phys; + pgd_t *pgd; + + addr = md->virtual; + phys = __pfn_to_phys(md->pfn); + length = PAGE_ALIGN(md->length); + + if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) { + printk(KERN_ERR "MM: CPU does not support supersection " + "mapping for 0x%08llx at 0x%08lx\n", + (long long)__pfn_to_phys((u64)md->pfn), addr); + return; + } + + /* N.B. ARMv6 supersections are only defined to work with domain 0. + * Since domain assignments can in fact be arbitrary, the + * 'domain == 0' check below is required to insure that ARMv6 + * supersections are only allocated for domain 0 regardless + * of the actual domain assignments in use. + */ + if (type->domain) { + printk(KERN_ERR "MM: invalid domain in supersection " + "mapping for 0x%08llx at 0x%08lx\n", + (long long)__pfn_to_phys((u64)md->pfn), addr); + return; + } + + if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) { + printk(KERN_ERR "MM: cannot create mapping for 0x%08llx" + " at 0x%08lx invalid alignment\n", + (long long)__pfn_to_phys((u64)md->pfn), addr); + return; + } + + /* + * Shift bits [35:32] of address into bits [23:20] of PMD + * (See ARMv6 spec). + */ + phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20); + + pgd = pgd_offset_k(addr); + end = addr + length; + do { + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); + int i; + + for (i = 0; i < 16; i++) + *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER); + + addr += SUPERSECTION_SIZE; + phys += SUPERSECTION_SIZE; + pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT; + } while (addr != end); } +#endif /* !CONFIG_ARM_LPAE */ -void __new_context(struct mm_struct *mm) +/* + * Create the page directory entries and any necessary + * page tables for the mapping specified by `md'. We + * are able to cope here with varying sizes and address + * offsets, and we take full advantage of sections and + * supersections. + */ +static void __init create_mapping(struct map_desc *md) { - unsigned int asid; + unsigned long addr, length, end; + phys_addr_t phys; + const struct mem_type *type; + pgd_t *pgd; + + if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { + printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx" + " at 0x%08lx in user region\n", + (long long)__pfn_to_phys((u64)md->pfn), md->virtual); + return; + } + + if ((md->type == MT_DEVICE || md->type == MT_ROM) && + md->virtual >= PAGE_OFFSET && + (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { + printk(KERN_WARNING "BUG: mapping for 0x%08llx" + " at 0x%08lx out of vmalloc space\n", + (long long)__pfn_to_phys((u64)md->pfn), md->virtual); + } - asid = ++cpu_last_asid; - if (asid == 0) - asid = cpu_last_asid = 1 << ASID_BITS; + type = &mem_types[md->type]; +#ifndef CONFIG_ARM_LPAE /* - * If we've used up all our ASIDs, we need - * to start a new version and flush the TLB. + * Catch 36-bit addresses */ - if ((asid & ~ASID_MASK) == 0) - flush_tlb_all(); + if (md->pfn >= 0x100000) { + create_36bit_mapping(md, type); + return; + } +#endif + + addr = md->virtual & PAGE_MASK; + phys = __pfn_to_phys(md->pfn); + length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); + + if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { + printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not " + "be mapped using pages, ignoring.\n", + (long long)__pfn_to_phys(md->pfn), addr); + return; + } + + pgd = pgd_offset_k(addr); + end = addr + length; + do { + unsigned long next = pgd_addr_end(addr, end); + + alloc_init_pud(pgd, addr, next, phys, type); + + phys += next - addr; + addr = next; + } while (pgd++, addr != end); +} + +/* + * Create the architecture specific mappings + */ +void __init iotable_init(struct map_desc *io_desc, int nr) +{ + struct map_desc *md; + struct vm_struct *vm; + struct static_vm *svm; + + if (!nr) + return; + + svm = early_alloc_aligned(sizeof(*svm) * nr, __alignof__(*svm)); + + for (md = io_desc; nr; md++, nr--) { + create_mapping(md); + + vm = &svm->vm; + vm->addr = (void *)(md->virtual & PAGE_MASK); + vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); + vm->phys_addr = __pfn_to_phys(md->pfn); + vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; + vm->flags |= VM_ARM_MTYPE(md->type); + vm->caller = iotable_init; + add_static_vm_early(svm++); + } +} + +void __init vm_reserve_area_early(unsigned long addr, unsigned long size, + void *caller) +{ + struct vm_struct *vm; + struct static_vm *svm; + + svm = early_alloc_aligned(sizeof(*svm), __alignof__(*svm)); + + vm = &svm->vm; + vm->addr = (void *)addr; + vm->size = size; + vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING; + vm->caller = caller; + add_static_vm_early(svm); +} + +#ifndef CONFIG_ARM_LPAE + +/* + * The Linux PMD is made of two consecutive section entries covering 2MB + * (see definition in include/asm/pgtable-2level.h). However a call to + * create_mapping() may optimize static mappings by using individual + * 1MB section mappings. This leaves the actual PMD potentially half + * initialized if the top or bottom section entry isn't used, leaving it + * open to problems if a subsequent ioremap() or vmalloc() tries to use + * the virtual space left free by that unused section entry. + * + * Let's avoid the issue by inserting dummy vm entries covering the unused + * PMD halves once the static mappings are in place. + */ + +static void __init pmd_empty_section_gap(unsigned long addr) +{ + vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap); +} + +static void __init fill_pmd_gaps(void) +{ + struct static_vm *svm; + struct vm_struct *vm; + unsigned long addr, next = 0; + pmd_t *pmd; + + list_for_each_entry(svm, &static_vmlist, list) { + vm = &svm->vm; + addr = (unsigned long)vm->addr; + if (addr < next) + continue; + + /* + * Check if this vm starts on an odd section boundary. + * If so and the first section entry for this PMD is free + * then we block the corresponding virtual address. + */ + if ((addr & ~PMD_MASK) == SECTION_SIZE) { + pmd = pmd_off_k(addr); + if (pmd_none(*pmd)) + pmd_empty_section_gap(addr & PMD_MASK); + } + + /* + * Then check if this vm ends on an odd section boundary. + * If so and the second section entry for this PMD is empty + * then we block the corresponding virtual address. + */ + addr += vm->size; + if ((addr & ~PMD_MASK) == SECTION_SIZE) { + pmd = pmd_off_k(addr) + 1; + if (pmd_none(*pmd)) + pmd_empty_section_gap(addr); + } + + /* no need to look at any vm entry until we hit the next PMD */ + next = (addr + PMD_SIZE - 1) & PMD_MASK; + } +} + +#else +#define fill_pmd_gaps() do { } while (0) +#endif + +#if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H) +static void __init pci_reserve_io(void) +{ + struct static_vm *svm; + + svm = find_static_vm_vaddr((void *)PCI_IO_VIRT_BASE); + if (svm) + return; + + vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io); +} +#else +#define pci_reserve_io() do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LL +void __init debug_ll_io_init(void) +{ + struct map_desc map; + + debug_ll_addr(&map.pfn, &map.virtual); + if (!map.pfn || !map.virtual) + return; + map.pfn = __phys_to_pfn(map.pfn); + map.virtual &= PAGE_MASK; + map.length = PAGE_SIZE; + map.type = MT_DEVICE; + iotable_init(&map, 1); +} +#endif + +static void * __initdata vmalloc_min = + (void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET); + +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' + * bytes. This can be used to increase (or decrease) the vmalloc + * area - the default is 240m. + */ +static int __init early_vmalloc(char *arg) +{ + unsigned long vmalloc_reserve = memparse(arg, NULL); + + if (vmalloc_reserve < SZ_16M) { + vmalloc_reserve = SZ_16M; + printk(KERN_WARNING + "vmalloc area too small, limiting to %luMB\n", + vmalloc_reserve >> 20); + } + + if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) { + vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M); + printk(KERN_WARNING + "vmalloc area is too big, limiting to %luMB\n", + vmalloc_reserve >> 20); + } + + vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve); + return 0; +} +early_param("vmalloc", early_vmalloc); + +phys_addr_t arm_lowmem_limit __initdata = 0; + +void __init sanity_check_meminfo(void) +{ + phys_addr_t memblock_limit = 0; + int highmem = 0; + phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; + struct memblock_region *reg; + + for_each_memblock(memory, reg) { + phys_addr_t block_start = reg->base; + phys_addr_t block_end = reg->base + reg->size; + phys_addr_t size_limit = reg->size; + + if (reg->base >= vmalloc_limit) + highmem = 1; + else + size_limit = vmalloc_limit - reg->base; + + + if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) { + + if (highmem) { + pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n", + &block_start, &block_end); + memblock_remove(reg->base, reg->size); + continue; + } + + if (reg->size > size_limit) { + phys_addr_t overlap_size = reg->size - size_limit; + + pr_notice("Truncating RAM at %pa-%pa to -%pa", + &block_start, &block_end, &vmalloc_limit); + memblock_remove(vmalloc_limit, overlap_size); + block_end = vmalloc_limit; + } + } + + if (!highmem) { + if (block_end > arm_lowmem_limit) { + if (reg->size > size_limit) + arm_lowmem_limit = vmalloc_limit; + else + arm_lowmem_limit = block_end; + } + + /* + * Find the first non-section-aligned page, and point + * memblock_limit at it. This relies on rounding the + * limit down to be section-aligned, which happens at + * the end of this function. + * + * With this algorithm, the start or end of almost any + * bank can be non-section-aligned. The only exception + * is that the start of the bank 0 must be section- + * aligned, since otherwise memory would need to be + * allocated when mapping the start of bank 0, which + * occurs before any free memory is mapped. + */ + if (!memblock_limit) { + if (!IS_ALIGNED(block_start, SECTION_SIZE)) + memblock_limit = block_start; + else if (!IS_ALIGNED(block_end, SECTION_SIZE)) + memblock_limit = arm_lowmem_limit; + } + + } + } + + high_memory = __va(arm_lowmem_limit - 1) + 1; + + /* + * Round the memblock limit down to a section size. This + * helps to ensure that we will allocate memory from the + * last full section, which should be mapped. + */ + if (memblock_limit) + memblock_limit = round_down(memblock_limit, SECTION_SIZE); + if (!memblock_limit) + memblock_limit = arm_lowmem_limit; + + memblock_set_current_limit(memblock_limit); +} + +static inline void prepare_page_table(void) +{ + unsigned long addr; + phys_addr_t end; + + /* + * Clear out all the mappings below the kernel image. + */ + for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + +#ifdef CONFIG_XIP_KERNEL + /* The XIP kernel is mapped in the module area -- skip over it */ + addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK; +#endif + for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + + /* + * Find the end of the first block of lowmem. + */ + end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; + if (end >= arm_lowmem_limit) + end = arm_lowmem_limit; + + /* + * Clear out all the kernel space mappings, except for the first + * memory bank, up to the vmalloc region. + */ + for (addr = __phys_to_virt(end); + addr < VMALLOC_START; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); +} + +#ifdef CONFIG_ARM_LPAE +/* the first page is reserved for pgd */ +#define SWAPPER_PG_DIR_SIZE (PAGE_SIZE + \ + PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t)) +#else +#define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) +#endif + +/* + * Reserve the special regions of memory + */ +void __init arm_mm_memblock_reserve(void) +{ + /* + * Reserve the page tables. These are already in use, + * and can only be in node 0. + */ + memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE); + +#ifdef CONFIG_SA1111 + /* + * Because of the SA1111 DMA bug, we want to preserve our + * precious DMA-able memory... + */ + memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); +#endif +} + +/* + * Set up the device mappings. Since we clear out the page tables for all + * mappings above VMALLOC_START, we will remove any debug device mappings. + * This means you have to be careful how you debug this function, or any + * called function. This means you can't use any function or debugging + * method which may touch any device, otherwise the kernel _will_ crash. + */ +static void __init devicemaps_init(const struct machine_desc *mdesc) +{ + struct map_desc map; + unsigned long addr; + void *vectors; + + /* + * Allocate the vector page early. + */ + vectors = early_alloc(PAGE_SIZE * 2); + + early_trap_init(vectors); + + for (addr = VMALLOC_START; addr; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + + /* + * Map the kernel if it is XIP. + * It is always first in the modulearea. + */ +#ifdef CONFIG_XIP_KERNEL + map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); + map.virtual = MODULES_VADDR; + map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; + map.type = MT_ROM; + create_mapping(&map); +#endif + + /* + * Map the cache flushing regions. + */ +#ifdef FLUSH_BASE + map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS); + map.virtual = FLUSH_BASE; + map.length = SZ_1M; + map.type = MT_CACHECLEAN; + create_mapping(&map); +#endif +#ifdef FLUSH_BASE_MINICACHE + map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M); + map.virtual = FLUSH_BASE_MINICACHE; + map.length = SZ_1M; + map.type = MT_MINICLEAN; + create_mapping(&map); +#endif + + /* + * Create a mapping for the machine vectors at the high-vectors + * location (0xffff0000). If we aren't using high-vectors, also + * create a mapping at the low-vectors virtual address. + */ + map.pfn = __phys_to_pfn(virt_to_phys(vectors)); + map.virtual = 0xffff0000; + map.length = PAGE_SIZE; +#ifdef CONFIG_KUSER_HELPERS + map.type = MT_HIGH_VECTORS; +#else + map.type = MT_LOW_VECTORS; +#endif + create_mapping(&map); + + if (!vectors_high()) { + map.virtual = 0; + map.length = PAGE_SIZE * 2; + map.type = MT_LOW_VECTORS; + create_mapping(&map); + } + + /* Now create a kernel read-only mapping */ + map.pfn += 1; + map.virtual = 0xffff0000 + PAGE_SIZE; + map.length = PAGE_SIZE; + map.type = MT_LOW_VECTORS; + create_mapping(&map); + + /* + * Ask the machine support to map in the statically mapped devices. + */ + if (mdesc->map_io) + mdesc->map_io(); + else + debug_ll_io_init(); + fill_pmd_gaps(); + + /* Reserve fixed i/o space in VMALLOC region */ + pci_reserve_io(); + + /* + * Finally flush the caches and tlb to ensure that we're in a + * consistent state wrt the writebuffer. This also ensures that + * any write-allocated cache lines in the vector page are written + * back. After this point, we can start to touch devices again. + */ + local_flush_tlb_all(); + flush_cache_all(); +} + +static void __init kmap_init(void) +{ +#ifdef CONFIG_HIGHMEM + pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE), + PKMAP_BASE, _PAGE_KERNEL_TABLE); + + fixmap_page_table = early_pte_alloc(pmd_off_k(FIXADDR_START), + FIXADDR_START, _PAGE_KERNEL_TABLE); +#endif +} + +static void __init map_lowmem(void) +{ + struct memblock_region *reg; + unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + + /* Map all the lowmem memory banks. */ + for_each_memblock(memory, reg) { + phys_addr_t start = reg->base; + phys_addr_t end = start + reg->size; + struct map_desc map; + + if (end > arm_lowmem_limit) + end = arm_lowmem_limit; + if (start >= end) + break; + + if (end < kernel_x_start || start >= kernel_x_end) { + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY_RWX; + + create_mapping(&map); + } else { + /* This better cover the entire kernel */ + if (start < kernel_x_start) { + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = kernel_x_start - start; + map.type = MT_MEMORY_RW; + + create_mapping(&map); + } + + map.pfn = __phys_to_pfn(kernel_x_start); + map.virtual = __phys_to_virt(kernel_x_start); + map.length = kernel_x_end - kernel_x_start; + map.type = MT_MEMORY_RWX; + + create_mapping(&map); + + if (kernel_x_end < end) { + map.pfn = __phys_to_pfn(kernel_x_end); + map.virtual = __phys_to_virt(kernel_x_end); + map.length = end - kernel_x_end; + map.type = MT_MEMORY_RW; + + create_mapping(&map); + } + } + } +} + +#ifdef CONFIG_ARM_LPAE +/* + * early_paging_init() recreates boot time page table setup, allowing machines + * to switch over to a high (>4G) address space on LPAE systems + */ +void __init early_paging_init(const struct machine_desc *mdesc, + struct proc_info_list *procinfo) +{ + pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags; + unsigned long map_start, map_end; + pgd_t *pgd0, *pgdk; + pud_t *pud0, *pudk, *pud_start; + pmd_t *pmd0, *pmdk; + phys_addr_t phys; + int i; + + if (!(mdesc->init_meminfo)) + return; + + /* remap kernel code and data */ + map_start = init_mm.start_code & PMD_MASK; + map_end = ALIGN(init_mm.brk, PMD_SIZE); + + /* get a handle on things... */ + pgd0 = pgd_offset_k(0); + pud_start = pud0 = pud_offset(pgd0, 0); + pmd0 = pmd_offset(pud0, 0); + + pgdk = pgd_offset_k(map_start); + pudk = pud_offset(pgdk, map_start); + pmdk = pmd_offset(pudk, map_start); + + mdesc->init_meminfo(); + + /* Run the patch stub to update the constants */ + fixup_pv_table(&__pv_table_begin, + (&__pv_table_end - &__pv_table_begin) << 2); + + /* + * Cache cleaning operations for self-modifying code + * We should clean the entries by MVA but running a + * for loop over every pv_table entry pointer would + * just complicate the code. + */ + flush_cache_louis(); + dsb(ishst); + isb(); + + /* remap level 1 table */ + for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { + set_pud(pud0, + __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); + pmd0 += PTRS_PER_PMD; + } + + /* remap pmds for kernel mapping */ + phys = __pa(map_start); + do { + *pmdk++ = __pmd(phys | pmdprot); + phys += PMD_SIZE; + } while (phys < map_end); + + flush_cache_all(); + cpu_switch_mm(pgd0, &init_mm); + cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); + local_flush_bp_all(); + local_flush_tlb_all(); +} + +#else + +void __init early_paging_init(const struct machine_desc *mdesc, + struct proc_info_list *procinfo) +{ + if (mdesc->init_meminfo) + mdesc->init_meminfo(); +} + +#endif + +/* + * paging_init() sets up the page tables, initialises the zone memory + * maps, and sets up the zero page, bad page and bad page tables. + */ +void __init paging_init(const struct machine_desc *mdesc) +{ + void *zero_page; + + build_mem_type_table(); + prepare_page_table(); + map_lowmem(); + dma_contiguous_remap(); + devicemaps_init(mdesc); + kmap_init(); + tcm_init(); + + top_pmd = pmd_off_k(0xffff0000); + + /* allocate the zero page. */ + zero_page = early_alloc(PAGE_SIZE); + + bootmem_init(); - mm->context.id = asid; + empty_zero_page = virt_to_page(zero_page); + __flush_dcache_page(NULL, empty_zero_page); } diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c new file mode 100644 index 00000000000..a014dfacd5c --- /dev/null +++ b/arch/arm/mm/nommu.c @@ -0,0 +1,389 @@ +/* + * linux/arch/arm/mm/nommu.c + * + * ARM uCLinux supporting functions. + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/io.h> +#include <linux/memblock.h> +#include <linux/kernel.h> + +#include <asm/cacheflush.h> +#include <asm/sections.h> +#include <asm/page.h> +#include <asm/setup.h> +#include <asm/traps.h> +#include <asm/mach/arch.h> +#include <asm/cputype.h> +#include <asm/mpu.h> +#include <asm/procinfo.h> + +#include "mm.h" + +#ifdef CONFIG_ARM_MPU +struct mpu_rgn_info mpu_rgn_info; + +/* Region number */ +static void rgnr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c2, 0" : : "r" (v)); +} + +/* Data-side / unified region attributes */ + +/* Region access control register */ +static void dracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 4" : : "r" (v)); +} + +/* Region size register */ +static void drsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 2" : : "r" (v)); +} + +/* Region base address register */ +static void drbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 0" : : "r" (v)); +} + +static u32 drbar_read(void) +{ + u32 v; + asm("mrc p15, 0, %0, c6, c1, 0" : "=r" (v)); + return v; +} +/* Optional instruction-side region attributes */ + +/* I-side Region access control register */ +static void iracr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 5" : : "r" (v)); +} + +/* I-side Region size register */ +static void irsr_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 3" : : "r" (v)); +} + +/* I-side Region base address register */ +static void irbar_write(u32 v) +{ + asm("mcr p15, 0, %0, c6, c1, 1" : : "r" (v)); +} + +static unsigned long irbar_read(void) +{ + unsigned long v; + asm("mrc p15, 0, %0, c6, c1, 1" : "=r" (v)); + return v; +} + +/* MPU initialisation functions */ +void __init sanity_check_meminfo_mpu(void) +{ + int i; + phys_addr_t phys_offset = PHYS_OFFSET; + phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size; + struct memblock_region *reg; + bool first = true; + phys_addr_t mem_start; + phys_addr_t mem_end; + + for_each_memblock(memory, reg) { + if (first) { + /* + * Initially only use memory continuous from + * PHYS_OFFSET */ + if (reg->base != phys_offset) + panic("First memory bank must be contiguous from PHYS_OFFSET"); + + mem_start = reg->base; + mem_end = reg->base + reg->size; + specified_mem_size = reg->size; + first = false; + } else { + /* + * memblock auto merges contiguous blocks, remove + * all blocks afterwards + */ + pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", + &mem_start, ®->base); + memblock_remove(reg->base, reg->size); + } + } + + /* + * MPU has curious alignment requirements: Size must be power of 2, and + * region start must be aligned to the region size + */ + if (phys_offset != 0) + pr_info("PHYS_OFFSET != 0 => MPU Region size constrained by alignment requirements\n"); + + /* + * Maximum aligned region might overflow phys_addr_t if phys_offset is + * 0. Hence we keep everything below 4G until we take the smaller of + * the aligned_region_size and rounded_mem_size, one of which is + * guaranteed to be smaller than the maximum physical address. + */ + aligned_region_size = (phys_offset - 1) ^ (phys_offset); + /* Find the max power-of-two sized region that fits inside our bank */ + rounded_mem_size = (1 << __fls(specified_mem_size)) - 1; + + /* The actual region size is the smaller of the two */ + aligned_region_size = aligned_region_size < rounded_mem_size + ? aligned_region_size + 1 + : rounded_mem_size + 1; + + if (aligned_region_size != specified_mem_size) { + pr_warn("Truncating memory from %pa to %pa (MPU region constraints)", + &specified_mem_size, &aligned_region_size); + memblock_remove(mem_start + aligned_region_size, + specified_mem_size - aligned_round_size); + + mem_end = mem_start + aligned_region_size; + } + + pr_debug("MPU Region from %pa size %pa (end %pa))\n", + &phys_offset, &aligned_region_size, &mem_end); + +} + +static int mpu_present(void) +{ + return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7); +} + +static int mpu_max_regions(void) +{ + /* + * We don't support a different number of I/D side regions so if we + * have separate instruction and data memory maps then return + * whichever side has a smaller number of supported regions. + */ + u32 dregions, iregions, mpuir; + mpuir = read_cpuid(CPUID_MPUIR); + + dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION; + + /* Check for separate d-side and i-side memory maps */ + if (mpuir & MPUIR_nU) + iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION; + + /* Use the smallest of the two maxima */ + return min(dregions, iregions); +} + +static int mpu_iside_independent(void) +{ + /* MPUIR.nU specifies whether there is *not* a unified memory map */ + return read_cpuid(CPUID_MPUIR) & MPUIR_nU; +} + +static int mpu_min_region_order(void) +{ + u32 drbar_result, irbar_result; + /* We've kept a region free for this probing */ + rgnr_write(MPU_PROBE_REGION); + isb(); + /* + * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum + * region order + */ + drbar_write(0xFFFFFFFC); + drbar_result = irbar_result = drbar_read(); + drbar_write(0x0); + /* If the MPU is non-unified, we use the larger of the two minima*/ + if (mpu_iside_independent()) { + irbar_write(0xFFFFFFFC); + irbar_result = irbar_read(); + irbar_write(0x0); + } + isb(); /* Ensure that MPU region operations have completed */ + /* Return whichever result is larger */ + return __ffs(max(drbar_result, irbar_result)); +} + +static int mpu_setup_region(unsigned int number, phys_addr_t start, + unsigned int size_order, unsigned int properties) +{ + u32 size_data; + + /* We kept a region free for probing resolution of MPU regions*/ + if (number > mpu_max_regions() || number == MPU_PROBE_REGION) + return -ENOENT; + + if (size_order > 32) + return -ENOMEM; + + if (size_order < mpu_min_region_order()) + return -ENOMEM; + + /* Writing N to bits 5:1 (RSR_SZ) specifies region size 2^N+1 */ + size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN; + + dsb(); /* Ensure all previous data accesses occur with old mappings */ + rgnr_write(number); + isb(); + drbar_write(start); + dracr_write(properties); + isb(); /* Propagate properties before enabling region */ + drsr_write(size_data); + + /* Check for independent I-side registers */ + if (mpu_iside_independent()) { + irbar_write(start); + iracr_write(properties); + isb(); + irsr_write(size_data); + } + isb(); + + /* Store region info (we treat i/d side the same, so only store d) */ + mpu_rgn_info.rgns[number].dracr = properties; + mpu_rgn_info.rgns[number].drbar = start; + mpu_rgn_info.rgns[number].drsr = size_data; + return 0; +} + +/* +* Set up default MPU regions, doing nothing if there is no MPU +*/ +void __init mpu_setup(void) +{ + int region_err; + if (!mpu_present()) + return; + + region_err = mpu_setup_region(MPU_RAM_REGION, PHYS_OFFSET, + ilog2(meminfo.bank[0].size), + MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL); + if (region_err) { + panic("MPU region initialization failure! %d", region_err); + } else { + pr_info("Using ARMv7 PMSA Compliant MPU. " + "Region independence: %s, Max regions: %d\n", + mpu_iside_independent() ? "Yes" : "No", + mpu_max_regions()); + } +} +#else +static void sanity_check_meminfo_mpu(void) {} +static void __init mpu_setup(void) {} +#endif /* CONFIG_ARM_MPU */ + +void __init arm_mm_memblock_reserve(void) +{ +#ifndef CONFIG_CPU_V7M + /* + * Register the exception vector page. + * some architectures which the DRAM is the exception vector to trap, + * alloc_page breaks with error, although it is not NULL, but "0." + */ + memblock_reserve(CONFIG_VECTORS_BASE, PAGE_SIZE); +#else /* ifndef CONFIG_CPU_V7M */ + /* + * There is no dedicated vector page on V7-M. So nothing needs to be + * reserved here. + */ +#endif +} + +void __init sanity_check_meminfo(void) +{ + phys_addr_t end; + sanity_check_meminfo_mpu(); + end = memblock_end_of_DRAM(); + high_memory = __va(end - 1) + 1; + memblock_set_current_limit(end); +} + +/* + * early_paging_init() recreates boot time page table setup, allowing machines + * to switch over to a high (>4G) address space on LPAE systems + */ +void __init early_paging_init(const struct machine_desc *mdesc, + struct proc_info_list *procinfo) +{ +} + +/* + * paging_init() sets up the page tables, initialises the zone memory + * maps, and sets up the zero page, bad page and bad page tables. + */ +void __init paging_init(const struct machine_desc *mdesc) +{ + early_trap_init((void *)CONFIG_VECTORS_BASE); + mpu_setup(); + bootmem_init(); +} + +/* + * We don't need to do anything here for nommu machines. + */ +void setup_mm_for_reboot(void) +{ +} + +void flush_dcache_page(struct page *page) +{ + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); +} +EXPORT_SYMBOL(flush_dcache_page); + +void flush_kernel_dcache_page(struct page *page) +{ + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); +} +EXPORT_SYMBOL(flush_kernel_dcache_page); + +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *dst, const void *src, + unsigned long len) +{ + memcpy(dst, src, len); + if (vma->vm_flags & VM_EXEC) + __cpuc_coherent_user_range(uaddr, uaddr + len); +} + +void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, + size_t size, unsigned int mtype) +{ + if (pfn >= (0x100000000ULL >> PAGE_SHIFT)) + return NULL; + return (void __iomem *) (offset + (pfn << PAGE_SHIFT)); +} +EXPORT_SYMBOL(__arm_ioremap_pfn); + +void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, + size_t size, unsigned int mtype, void *caller) +{ + return __arm_ioremap_pfn(pfn, offset, size, mtype); +} + +void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size, + unsigned int mtype) +{ + return (void __iomem *)phys_addr; +} +EXPORT_SYMBOL(__arm_ioremap); + +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); + +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, + unsigned int mtype, void *caller) +{ + return __arm_ioremap(phys_addr, size, mtype); +} + +void (*arch_iounmap)(volatile void __iomem *); + +void __arm_iounmap(volatile void __iomem *addr) +{ +} +EXPORT_SYMBOL(__arm_iounmap); diff --git a/arch/arm/mm/pabort-legacy.S b/arch/arm/mm/pabort-legacy.S new file mode 100644 index 00000000000..8bbff025269 --- /dev/null +++ b/arch/arm/mm/pabort-legacy.S @@ -0,0 +1,21 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Function: legacy_pabort + * + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current prefetch abort. + */ + + .align 5 +ENTRY(legacy_pabort) + mov r0, r4 + mov r1, #5 + b do_PrefetchAbort +ENDPROC(legacy_pabort) diff --git a/arch/arm/mm/pabort-v6.S b/arch/arm/mm/pabort-v6.S new file mode 100644 index 00000000000..9627646ce78 --- /dev/null +++ b/arch/arm/mm/pabort-v6.S @@ -0,0 +1,21 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Function: v6_pabort + * + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current prefetch abort. + */ + + .align 5 +ENTRY(v6_pabort) + mov r0, r4 + mrc p15, 0, r1, c5, c0, 1 @ get IFSR + b do_PrefetchAbort +ENDPROC(v6_pabort) diff --git a/arch/arm/mm/pabort-v7.S b/arch/arm/mm/pabort-v7.S new file mode 100644 index 00000000000..875761f44f3 --- /dev/null +++ b/arch/arm/mm/pabort-v7.S @@ -0,0 +1,21 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Function: v7_pabort + * + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current prefetch abort. + */ + + .align 5 +ENTRY(v7_pabort) + mrc p15, 0, r0, c6, c0, 2 @ get IFAR + mrc p15, 0, r1, c5, c0, 1 @ get IFSR + b do_PrefetchAbort +ENDPROC(v7_pabort) diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c new file mode 100644 index 00000000000..249379535be --- /dev/null +++ b/arch/arm/mm/pgd.c @@ -0,0 +1,160 @@ +/* + * linux/arch/arm/mm/pgd.c + * + * Copyright (C) 1998-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/mm.h> +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/slab.h> + +#include <asm/cp15.h> +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/tlbflush.h> + +#include "mm.h" + +#ifdef CONFIG_ARM_LPAE +#define __pgd_alloc() kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL) +#define __pgd_free(pgd) kfree(pgd) +#else +#define __pgd_alloc() (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, 2) +#define __pgd_free(pgd) free_pages((unsigned long)pgd, 2) +#endif + +/* + * need to get a 16k page for level 1 + */ +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *new_pgd, *init_pgd; + pud_t *new_pud, *init_pud; + pmd_t *new_pmd, *init_pmd; + pte_t *new_pte, *init_pte; + + new_pgd = __pgd_alloc(); + if (!new_pgd) + goto no_pgd; + + memset(new_pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); + + /* + * Copy over the kernel and IO PGD entries + */ + init_pgd = pgd_offset_k(0); + memcpy(new_pgd + USER_PTRS_PER_PGD, init_pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + + clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); + +#ifdef CONFIG_ARM_LPAE + /* + * Allocate PMD table for modules and pkmap mappings. + */ + new_pud = pud_alloc(mm, new_pgd + pgd_index(MODULES_VADDR), + MODULES_VADDR); + if (!new_pud) + goto no_pud; + + new_pmd = pmd_alloc(mm, new_pud, 0); + if (!new_pmd) + goto no_pmd; +#endif + + if (!vectors_high()) { + /* + * On ARM, first page must always be allocated since it + * contains the machine vectors. The vectors are always high + * with LPAE. + */ + new_pud = pud_alloc(mm, new_pgd, 0); + if (!new_pud) + goto no_pud; + + new_pmd = pmd_alloc(mm, new_pud, 0); + if (!new_pmd) + goto no_pmd; + + new_pte = pte_alloc_map(mm, NULL, new_pmd, 0); + if (!new_pte) + goto no_pte; + + init_pud = pud_offset(init_pgd, 0); + init_pmd = pmd_offset(init_pud, 0); + init_pte = pte_offset_map(init_pmd, 0); + set_pte_ext(new_pte + 0, init_pte[0], 0); + set_pte_ext(new_pte + 1, init_pte[1], 0); + pte_unmap(init_pte); + pte_unmap(new_pte); + } + + return new_pgd; + +no_pte: + pmd_free(mm, new_pmd); +no_pmd: + pud_free(mm, new_pud); +no_pud: + __pgd_free(new_pgd); +no_pgd: + return NULL; +} + +void pgd_free(struct mm_struct *mm, pgd_t *pgd_base) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pgtable_t pte; + + if (!pgd_base) + return; + + pgd = pgd_base + pgd_index(0); + if (pgd_none_or_clear_bad(pgd)) + goto no_pgd; + + pud = pud_offset(pgd, 0); + if (pud_none_or_clear_bad(pud)) + goto no_pud; + + pmd = pmd_offset(pud, 0); + if (pmd_none_or_clear_bad(pmd)) + goto no_pmd; + + pte = pmd_pgtable(*pmd); + pmd_clear(pmd); + pte_free(mm, pte); +no_pmd: + pud_clear(pud); + pmd_free(mm, pmd); +no_pud: + pgd_clear(pgd); + pud_free(mm, pud); +no_pgd: +#ifdef CONFIG_ARM_LPAE + /* + * Free modules/pkmap or identity pmd tables. + */ + for (pgd = pgd_base; pgd < pgd_base + PTRS_PER_PGD; pgd++) { + if (pgd_none_or_clear_bad(pgd)) + continue; + if (pgd_val(*pgd) & L_PGD_SWAPPER) + continue; + pud = pud_offset(pgd, 0); + if (pud_none_or_clear_bad(pud)) + continue; + pmd = pmd_offset(pud, 0); + pud_clear(pud); + pmd_free(mm, pmd); + pgd_clear(pgd); + pud_free(mm, pud); + } +#endif + __pgd_free(pgd_base); +} diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 82ec954e45b..d1a2d05971e 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -3,6 +3,7 @@ * * Copyright (C) 2000 ARM Limited * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,14 +26,15 @@ * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt */ #include <linux/linkage.h> -#include <linux/config.h> #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> -#include <asm/procinfo.h> #include <asm/ptrace.h> -#include <asm/hardware.h> + +#include "proc-macros.S" /* * This is the maximum size of an area which will be invalidated @@ -62,7 +64,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 32768 @@ -77,15 +79,11 @@ ENTRY(cpu_arm1020_proc_init) * cpu_arm1020_proc_fin() */ ENTRY(cpu_arm1020_proc_fin) - stmfd sp!, {lr} - mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, ip - bl arm1020_flush_kern_cache_all mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr /* * cpu_arm1020_reset(loc) @@ -97,16 +95,21 @@ ENTRY(cpu_arm1020_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm1020_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mrc p15, 0, ip, c1, c0, 0 @ ctrl register bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm1020_reset) + .popsection /* * cpu_arm1020_do_idle() @@ -119,6 +122,20 @@ ENTRY(cpu_arm1020_do_idle) /* ================================= CACHE ================================ */ .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1020_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + mov pc, lr +ENDPROC(arm1020_flush_icache_all) + /* * flush_user_cache_all() * @@ -224,20 +241,22 @@ ENTRY(arm1020_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - page - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(arm1020_flush_kern_dcache_page) +ENTRY(arm1020_flush_kern_dcache_area) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE - add r1, r0, #PAGE_SZ + add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry mcr p15, 0, ip, c7, c10, 4 @ drain WB add r0, r0, #CACHE_DLINESIZE @@ -260,7 +279,7 @@ ENTRY(arm1020_flush_kern_dcache_page) * * (same as v4wb) */ -ENTRY(arm1020_dma_inv_range) +arm1020_dma_inv_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE tst r0, #CACHE_DLINESIZE - 1 @@ -290,7 +309,7 @@ ENTRY(arm1020_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm1020_dma_clean_range) +arm1020_dma_clean_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -325,16 +344,35 @@ ENTRY(arm1020_dma_flush_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr -ENTRY(arm1020_cache_fns) - .long arm1020_flush_kern_cache_all - .long arm1020_flush_user_cache_all - .long arm1020_flush_user_cache_range - .long arm1020_coherent_kern_range - .long arm1020_coherent_user_range - .long arm1020_flush_kern_dcache_page - .long arm1020_dma_inv_range - .long arm1020_dma_clean_range - .long arm1020_dma_flush_range +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1020_dma_clean_range + bcs arm1020_dma_inv_range + b arm1020_dma_flush_range +ENDPROC(arm1020_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020_dma_unmap_area) + mov pc, lr +ENDPROC(arm1020_dma_unmap_area) + + .globl arm1020_flush_kern_cache_louis + .equ arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1020 .align 5 ENTRY(cpu_arm1020_dcache_clean_area) @@ -359,6 +397,7 @@ ENTRY(cpu_arm1020_dcache_clean_area) */ .align 5 ENTRY(cpu_arm1020_switch_mm) +#ifdef CONFIG_MMU #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r3, c7, c10, 4 mov r1, #0xF @ 16 segments @@ -383,6 +422,7 @@ ENTRY(cpu_arm1020_switch_mm) mcr p15, 0, r1, c7, c10, 4 @ drain WB mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif /* CONFIG_MMU */ mov pc, lr /* @@ -391,51 +431,32 @@ ENTRY(cpu_arm1020_switch_mm) * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm1020_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r1, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version +ENTRY(cpu_arm1020_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r0, c7, c10, 4 mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif /* CONFIG_MMU */ mov pc, lr - __INIT - .type __arm1020_setup, #function __arm1020_setup: mov r0, #0 mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + + adr r5, arm1020_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0 @ get control register v4 - ldr r5, arm1020_cr1_clear bic r0, r0, r5 - ldr r5, arm1020_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.. .... .... .... #endif @@ -447,42 +468,19 @@ __arm1020_setup: * .RVI ZFRS BLDP WCAM * .011 1001 ..11 0101 */ - .type arm1020_cr1_clear, #object - .type arm1020_cr1_set, #object -arm1020_cr1_clear: - .word 0x593f -arm1020_cr1_set: - .word 0x3935 + .type arm1020_crval, #object +arm1020_crval: + crval clear=0x0000593f, mmuset=0x00003935, ucset=0x00001930 __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1020, dabort=v4t_early_abort, pabort=legacy_pabort -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm1020_processor_functions, #object -arm1020_processor_functions: - .word v4t_early_abort - .word cpu_arm1020_proc_init - .word cpu_arm1020_proc_fin - .word cpu_arm1020_reset - .word cpu_arm1020_do_idle - .word cpu_arm1020_dcache_clean_area - .word cpu_arm1020_switch_mm - .word cpu_arm1020_set_pte - .size arm1020_processor_functions, . - arm1020_processor_functions .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name + string cpu_arch_name, "armv5t" + string cpu_elf_name, "v5" .type cpu_arm1020_name, #object cpu_arm1020_name: @@ -518,6 +516,9 @@ __arm1020_proc_info: .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ b __arm1020_setup .long cpu_arch_name .long cpu_elf_name diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 7375fe930f7..9d89405c3d0 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -3,6 +3,7 @@ * * Copyright (C) 2000 ARM Limited * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,14 +26,15 @@ * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt */ #include <linux/linkage.h> -#include <linux/config.h> #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> -#include <asm/procinfo.h> #include <asm/ptrace.h> -#include <asm/hardware.h> + +#include "proc-macros.S" /* * This is the maximum size of an area which will be invalidated @@ -62,7 +64,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 32768 @@ -77,15 +79,11 @@ ENTRY(cpu_arm1020e_proc_init) * cpu_arm1020e_proc_fin() */ ENTRY(cpu_arm1020e_proc_fin) - stmfd sp!, {lr} - mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, ip - bl arm1020e_flush_kern_cache_all mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr /* * cpu_arm1020e_reset(loc) @@ -97,16 +95,21 @@ ENTRY(cpu_arm1020e_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm1020e_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mrc p15, 0, ip, c1, c0, 0 @ ctrl register bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm1020e_reset) + .popsection /* * cpu_arm1020e_do_idle() @@ -119,6 +122,20 @@ ENTRY(cpu_arm1020e_do_idle) /* ================================= CACHE ================================ */ .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1020e_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + mov pc, lr +ENDPROC(arm1020e_flush_icache_all) + /* * flush_user_cache_all() * @@ -218,20 +235,22 @@ ENTRY(arm1020e_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - page - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(arm1020e_flush_kern_dcache_page) +ENTRY(arm1020e_flush_kern_dcache_area) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE - add r1, r0, #PAGE_SZ + add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE cmp r0, r1 @@ -253,7 +272,7 @@ ENTRY(arm1020e_flush_kern_dcache_page) * * (same as v4wb) */ -ENTRY(arm1020e_dma_inv_range) +arm1020e_dma_inv_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE tst r0, #CACHE_DLINESIZE - 1 @@ -279,7 +298,7 @@ ENTRY(arm1020e_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm1020e_dma_clean_range) +arm1020e_dma_clean_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -311,16 +330,35 @@ ENTRY(arm1020e_dma_flush_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr -ENTRY(arm1020e_cache_fns) - .long arm1020e_flush_kern_cache_all - .long arm1020e_flush_user_cache_all - .long arm1020e_flush_user_cache_range - .long arm1020e_coherent_kern_range - .long arm1020e_coherent_user_range - .long arm1020e_flush_kern_dcache_page - .long arm1020e_dma_inv_range - .long arm1020e_dma_clean_range - .long arm1020e_dma_flush_range +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020e_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1020e_dma_clean_range + bcs arm1020e_dma_inv_range + b arm1020e_dma_flush_range +ENDPROC(arm1020e_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020e_dma_unmap_area) + mov pc, lr +ENDPROC(arm1020e_dma_unmap_area) + + .globl arm1020e_flush_kern_cache_louis + .equ arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1020e .align 5 ENTRY(cpu_arm1020e_dcache_clean_area) @@ -344,6 +382,7 @@ ENTRY(cpu_arm1020e_dcache_clean_area) */ .align 5 ENTRY(cpu_arm1020e_switch_mm) +#ifdef CONFIG_MMU #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r3, c7, c10, 4 mov r1, #0xF @ 16 segments @@ -367,6 +406,7 @@ ENTRY(cpu_arm1020e_switch_mm) mcr p15, 0, r1, c7, c10, 4 @ drain WB mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif mov pc, lr /* @@ -375,49 +415,29 @@ ENTRY(cpu_arm1020e_switch_mm) * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm1020e_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r1, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version +ENTRY(cpu_arm1020e_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif +#endif /* CONFIG_MMU */ mov pc, lr - __INIT - .type __arm1020e_setup, #function __arm1020e_setup: mov r0, #0 mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm1020e_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0 @ get control register v4 - ldr r5, arm1020e_cr1_clear bic r0, r0, r5 - ldr r5, arm1020e_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.. .... .... .... #endif @@ -429,65 +449,19 @@ __arm1020e_setup: * .RVI ZFRS BLDP WCAM * .011 1001 ..11 0101 */ - .type arm1020e_cr1_clear, #object - .type arm1020e_cr1_set, #object -arm1020e_cr1_clear: - .word 0x5f3f -arm1020e_cr1_set: - .word 0x3935 + .type arm1020e_crval, #object +arm1020e_crval: + crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm1020e_processor_functions, #object -arm1020e_processor_functions: - .word v4t_early_abort - .word cpu_arm1020e_proc_init - .word cpu_arm1020e_proc_fin - .word cpu_arm1020e_reset - .word cpu_arm1020e_do_idle - .word cpu_arm1020e_dcache_clean_area - .word cpu_arm1020e_switch_mm - .word cpu_arm1020e_set_pte - .size arm1020e_processor_functions, . - arm1020e_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1020e, dabort=v4t_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5te" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm1020e_name, #object -cpu_arm1020e_name: - .ascii "ARM1020E" -#ifndef CONFIG_CPU_ICACHE_DISABLE - .ascii "i" -#endif -#ifndef CONFIG_CPU_DCACHE_DISABLE - .ascii "d" -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - .ascii "(wt)" -#else - .ascii "(wb)" -#endif -#endif -#ifndef CONFIG_CPU_BPREDICT_DISABLE - .ascii "B" -#endif -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - .ascii "RR" -#endif - .ascii "\0" - .size cpu_arm1020e_name, . - cpu_arm1020e_name + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_arm1020e_name, "ARM1020E" .align @@ -501,6 +475,10 @@ __arm1020e_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ b __arm1020e_setup .long cpu_arch_name .long cpu_elf_name diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 6ca639094d6..6f01a0ae3b3 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -3,6 +3,7 @@ * * Copyright (C) 2000 ARM Limited * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -14,14 +15,16 @@ * functions on the ARM1022E. */ #include <linux/linkage.h> -#include <linux/config.h> #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> -#include <asm/procinfo.h> #include <asm/ptrace.h> +#include "proc-macros.S" + /* * This is the maximum size of an area which will be invalidated * using the single invalidate entry instructions. Anything larger @@ -50,7 +53,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 32768 @@ -65,15 +68,11 @@ ENTRY(cpu_arm1022_proc_init) * cpu_arm1022_proc_fin() */ ENTRY(cpu_arm1022_proc_fin) - stmfd sp!, {lr} - mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, ip - bl arm1022_flush_kern_cache_all mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr /* * cpu_arm1022_reset(loc) @@ -85,16 +84,21 @@ ENTRY(cpu_arm1022_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm1022_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mrc p15, 0, ip, c1, c0, 0 @ ctrl register bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm1022_reset) + .popsection /* * cpu_arm1022_do_idle() @@ -107,6 +111,20 @@ ENTRY(cpu_arm1022_do_idle) /* ================================= CACHE ================================ */ .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1022_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + mov pc, lr +ENDPROC(arm1022_flush_icache_all) + /* * flush_user_cache_all() * @@ -206,20 +224,22 @@ ENTRY(arm1022_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - page - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(arm1022_flush_kern_dcache_page) +ENTRY(arm1022_flush_kern_dcache_area) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE - add r1, r0, #PAGE_SZ + add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE cmp r0, r1 @@ -241,7 +261,7 @@ ENTRY(arm1022_flush_kern_dcache_page) * * (same as v4wb) */ -ENTRY(arm1022_dma_inv_range) +arm1022_dma_inv_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE tst r0, #CACHE_DLINESIZE - 1 @@ -267,7 +287,7 @@ ENTRY(arm1022_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm1022_dma_clean_range) +arm1022_dma_clean_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -299,16 +319,35 @@ ENTRY(arm1022_dma_flush_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr -ENTRY(arm1022_cache_fns) - .long arm1022_flush_kern_cache_all - .long arm1022_flush_user_cache_all - .long arm1022_flush_user_cache_range - .long arm1022_coherent_kern_range - .long arm1022_coherent_user_range - .long arm1022_flush_kern_dcache_page - .long arm1022_dma_inv_range - .long arm1022_dma_clean_range - .long arm1022_dma_flush_range +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1022_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1022_dma_clean_range + bcs arm1022_dma_inv_range + b arm1022_dma_flush_range +ENDPROC(arm1022_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1022_dma_unmap_area) + mov pc, lr +ENDPROC(arm1022_dma_unmap_area) + + .globl arm1022_flush_kern_cache_louis + .equ arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1022 .align 5 ENTRY(cpu_arm1022_dcache_clean_area) @@ -332,6 +371,7 @@ ENTRY(cpu_arm1022_dcache_clean_area) */ .align 5 ENTRY(cpu_arm1022_switch_mm) +#ifdef CONFIG_MMU #ifndef CONFIG_CPU_DCACHE_DISABLE mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments 1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries @@ -348,57 +388,38 @@ ENTRY(cpu_arm1022_switch_mm) mcr p15, 0, r1, c7, c10, 4 @ drain WB mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif mov pc, lr /* - * cpu_arm1022_set_pte(ptep, pte) + * cpu_arm1022_set_pte_ext(ptep, pte, ext) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm1022_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r1, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version +ENTRY(cpu_arm1022_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif +#endif /* CONFIG_MMU */ mov pc, lr - __INIT - .type __arm1022_setup, #function __arm1022_setup: mov r0, #0 mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm1022_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0 @ get control register v4 - ldr r5, arm1022_cr1_clear bic r0, r0, r5 - ldr r5, arm1022_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.............. #endif @@ -411,65 +432,19 @@ __arm1022_setup: * .011 1001 ..11 0101 * */ - .type arm1022_cr1_clear, #object - .type arm1022_cr1_set, #object -arm1022_cr1_clear: - .word 0x7f3f -arm1022_cr1_set: - .word 0x3935 + .type arm1022_crval, #object +arm1022_crval: + crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm1022_processor_functions, #object -arm1022_processor_functions: - .word v4t_early_abort - .word cpu_arm1022_proc_init - .word cpu_arm1022_proc_fin - .word cpu_arm1022_reset - .word cpu_arm1022_do_idle - .word cpu_arm1022_dcache_clean_area - .word cpu_arm1022_switch_mm - .word cpu_arm1022_set_pte - .size arm1022_processor_functions, . - arm1022_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1022, dabort=v4t_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5te" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm1022_name, #object -cpu_arm1022_name: - .ascii "arm1022" -#ifndef CONFIG_CPU_ICACHE_DISABLE - .ascii "i" -#endif -#ifndef CONFIG_CPU_DCACHE_DISABLE - .ascii "d" -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - .ascii "(wt)" -#else - .ascii "(wb)" -#endif -#endif -#ifndef CONFIG_CPU_BPREDICT_DISABLE - .ascii "B" -#endif -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - .ascii "RR" -#endif - .ascii "\0" - .size cpu_arm1022_name, . - cpu_arm1022_name + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_arm1022_name, "ARM1022" .align @@ -483,6 +458,10 @@ __arm1022_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ b __arm1022_setup .long cpu_arch_name .long cpu_elf_name diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index 10317e4f55d..4799a24b43e 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -3,6 +3,7 @@ * * Copyright (C) 2000 ARM Limited * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -14,14 +15,16 @@ * functions on the ARM1026EJ-S. */ #include <linux/linkage.h> -#include <linux/config.h> #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> -#include <asm/procinfo.h> #include <asm/ptrace.h> +#include "proc-macros.S" + /* * This is the maximum size of an area which will be invalidated * using the single invalidate entry instructions. Anything larger @@ -50,7 +53,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 32768 @@ -65,15 +68,11 @@ ENTRY(cpu_arm1026_proc_init) * cpu_arm1026_proc_fin() */ ENTRY(cpu_arm1026_proc_fin) - stmfd sp!, {lr} - mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, ip - bl arm1026_flush_kern_cache_all mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr /* * cpu_arm1026_reset(loc) @@ -85,16 +84,21 @@ ENTRY(cpu_arm1026_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm1026_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mrc p15, 0, ip, c1, c0, 0 @ ctrl register bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm1026_reset) + .popsection /* * cpu_arm1026_do_idle() @@ -107,6 +111,20 @@ ENTRY(cpu_arm1026_do_idle) /* ================================= CACHE ================================ */ .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1026_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + mov pc, lr +ENDPROC(arm1026_flush_icache_all) + /* * flush_user_cache_all() * @@ -200,20 +218,22 @@ ENTRY(arm1026_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - page - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(arm1026_flush_kern_dcache_page) +ENTRY(arm1026_flush_kern_dcache_area) mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE - add r1, r0, #PAGE_SZ + add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE cmp r0, r1 @@ -235,7 +255,7 @@ ENTRY(arm1026_flush_kern_dcache_page) * * (same as v4wb) */ -ENTRY(arm1026_dma_inv_range) +arm1026_dma_inv_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE tst r0, #CACHE_DLINESIZE - 1 @@ -261,7 +281,7 @@ ENTRY(arm1026_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm1026_dma_clean_range) +arm1026_dma_clean_range: mov ip, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE bic r0, r0, #CACHE_DLINESIZE - 1 @@ -293,16 +313,35 @@ ENTRY(arm1026_dma_flush_range) mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr -ENTRY(arm1026_cache_fns) - .long arm1026_flush_kern_cache_all - .long arm1026_flush_user_cache_all - .long arm1026_flush_user_cache_range - .long arm1026_coherent_kern_range - .long arm1026_coherent_user_range - .long arm1026_flush_kern_dcache_page - .long arm1026_dma_inv_range - .long arm1026_dma_clean_range - .long arm1026_dma_flush_range +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1026_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1026_dma_clean_range + bcs arm1026_dma_inv_range + b arm1026_dma_flush_range +ENDPROC(arm1026_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1026_dma_unmap_area) + mov pc, lr +ENDPROC(arm1026_dma_unmap_area) + + .globl arm1026_flush_kern_cache_louis + .equ arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1026 .align 5 ENTRY(cpu_arm1026_dcache_clean_area) @@ -326,6 +365,7 @@ ENTRY(cpu_arm1026_dcache_clean_area) */ .align 5 ENTRY(cpu_arm1026_switch_mm) +#ifdef CONFIG_MMU mov r1, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE 1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate @@ -337,63 +377,43 @@ ENTRY(cpu_arm1026_switch_mm) mcr p15, 0, r1, c7, c10, 4 @ drain WB mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif mov pc, lr /* - * cpu_arm1026_set_pte(ptep, pte) + * cpu_arm1026_set_pte_ext(ptep, pte, ext) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm1026_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r1, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version +ENTRY(cpu_arm1026_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_DISABLE mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif +#endif /* CONFIG_MMU */ mov pc, lr - - __INIT - .type __arm1026_setup, #function __arm1026_setup: mov r0, #0 mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 mcr p15, 0, r4, c2, c0 @ load page table pointer +#endif #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mov r0, #4 @ explicitly disable writeback mcr p15, 7, r0, c15, c0, 0 #endif + adr r5, arm1026_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0 @ get control register v4 - ldr r5, arm1026_cr1_clear bic r0, r0, r5 - ldr r5, arm1026_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.. .... .... .... #endif @@ -406,67 +426,20 @@ __arm1026_setup: * .011 1001 ..11 0101 * */ - .type arm1026_cr1_clear, #object - .type arm1026_cr1_set, #object -arm1026_cr1_clear: - .word 0x7f3f -arm1026_cr1_set: - .word 0x3935 + .type arm1026_crval, #object +arm1026_crval: + crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001934 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm1026_processor_functions, #object -arm1026_processor_functions: - .word v5t_early_abort - .word cpu_arm1026_proc_init - .word cpu_arm1026_proc_fin - .word cpu_arm1026_reset - .word cpu_arm1026_do_idle - .word cpu_arm1026_dcache_clean_area - .word cpu_arm1026_switch_mm - .word cpu_arm1026_set_pte - .size arm1026_processor_functions, . - arm1026_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1026, dabort=v5t_early_abort, pabort=legacy_pabort .section .rodata - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5tej" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name + string cpu_arch_name, "armv5tej" + string cpu_elf_name, "v5" .align - - .type cpu_arm1026_name, #object -cpu_arm1026_name: - .ascii "ARM1026EJ-S" -#ifndef CONFIG_CPU_ICACHE_DISABLE - .ascii "i" -#endif -#ifndef CONFIG_CPU_DCACHE_DISABLE - .ascii "d" -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - .ascii "(wt)" -#else - .ascii "(wb)" -#endif -#endif -#ifndef CONFIG_CPU_BPREDICT_DISABLE - .ascii "B" -#endif -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - .ascii "RR" -#endif - .ascii "\0" - .size cpu_arm1026_name, . - cpu_arm1026_name - + string cpu_arm1026_name, "ARM1026EJ-S" .align .section ".proc.info.init", #alloc, #execinstr @@ -479,6 +452,10 @@ __arm1026_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ b __arm1026_setup .long cpu_arch_name .long cpu_elf_name diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S deleted file mode 100644 index 8e7e1e70ab0..00000000000 --- a/arch/arm/mm/proc-arm6_7.S +++ /dev/null @@ -1,404 +0,0 @@ -/* - * linux/arch/arm/mm/proc-arm6,7.S - * - * Copyright (C) 1997-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * These are the low level assembler for performing cache and TLB - * functions on the ARM610 & ARM710. - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/assembler.h> -#include <asm/asm-offsets.h> -#include <asm/pgtable.h> -#include <asm/procinfo.h> -#include <asm/ptrace.h> - -ENTRY(cpu_arm6_dcache_clean_area) -ENTRY(cpu_arm7_dcache_clean_area) - mov pc, lr - -/* - * Function: arm6_7_data_abort () - * - * Params : r2 = address of aborted instruction - * : sp = pointer to registers - * - * Purpose : obtain information about current aborted instruction - * - * Returns : r0 = address of abort - * : r1 = FSR - */ - -ENTRY(cpu_arm7_data_abort) - mrc p15, 0, r1, c5, c0, 0 @ get FSR - mrc p15, 0, r0, c6, c0, 0 @ get FAR - ldr r8, [r0] @ read arm instruction - tst r8, #1 << 20 @ L = 0 -> write? - orreq r1, r1, #1 << 11 @ yes. - and r7, r8, #15 << 24 - add pc, pc, r7, lsr #22 @ Now branch to the relevant processing routine - nop - -/* 0 */ b .data_unknown -/* 1 */ mov pc, lr @ swp -/* 2 */ b .data_unknown -/* 3 */ b .data_unknown -/* 4 */ b .data_arm_lateldrpostconst @ ldr rd, [rn], #m -/* 5 */ b .data_arm_lateldrpreconst @ ldr rd, [rn, #m] -/* 6 */ b .data_arm_lateldrpostreg @ ldr rd, [rn], rm -/* 7 */ b .data_arm_lateldrprereg @ ldr rd, [rn, rm] -/* 8 */ b .data_arm_ldmstm @ ldm*a rn, <rlist> -/* 9 */ b .data_arm_ldmstm @ ldm*b rn, <rlist> -/* a */ b .data_unknown -/* b */ b .data_unknown -/* c */ mov pc, lr @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m -/* d */ mov pc, lr @ ldc rd, [rn, #m] -/* e */ b .data_unknown -/* f */ -.data_unknown: @ Part of jumptable - mov r0, r2 - mov r1, r8 - mov r2, sp - bl baddataabort - b ret_from_exception - -ENTRY(cpu_arm6_data_abort) - mrc p15, 0, r1, c5, c0, 0 @ get FSR - mrc p15, 0, r0, c6, c0, 0 @ get FAR - ldr r8, [r2] @ read arm instruction - tst r8, #1 << 20 @ L = 0 -> write? - orreq r1, r1, #1 << 11 @ yes. - and r7, r8, #14 << 24 - teq r7, #8 << 24 @ was it ldm/stm - movne pc, lr - -.data_arm_ldmstm: - tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup - mov r7, #0x11 - orr r7, r7, #0x1100 - and r6, r8, r7 - and r2, r8, r7, lsl #1 - add r6, r6, r2, lsr #1 - and r2, r8, r7, lsl #2 - add r6, r6, r2, lsr #2 - and r2, r8, r7, lsl #3 - add r6, r6, r2, lsr #3 - add r6, r6, r6, lsr #8 - add r6, r6, r6, lsr #4 - and r6, r6, #15 @ r6 = no. of registers to transfer. - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' - tst r8, #1 << 23 @ Check U bit - subne r7, r7, r6, lsl #2 @ Undo increment - addeq r7, r7, r6, lsl #2 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr - -.data_arm_apply_r6_and_rn: - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' - tst r8, #1 << 23 @ Check U bit - subne r7, r7, r6 @ Undo incrmenet - addeq r7, r7, r6 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr - -.data_arm_lateldrpreconst: - tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup -.data_arm_lateldrpostconst: - movs r2, r8, lsl #20 @ Get offset - moveq pc, lr @ zero -> no fixup - and r5, r8, #15 << 16 @ Extract 'n' from instruction - ldr r7, [sp, r5, lsr #14] @ Get register 'Rn' - tst r8, #1 << 23 @ Check U bit - subne r7, r7, r2, lsr #20 @ Undo increment - addeq r7, r7, r2, lsr #20 @ Undo decrement - str r7, [sp, r5, lsr #14] @ Put register 'Rn' - mov pc, lr - -.data_arm_lateldrprereg: - tst r8, #1 << 21 @ check writeback bit - moveq pc, lr @ no writeback -> no fixup -.data_arm_lateldrpostreg: - and r7, r8, #15 @ Extract 'm' from instruction - ldr r6, [sp, r7, lsl #2] @ Get register 'Rm' - mov r5, r8, lsr #7 @ get shift count - ands r5, r5, #31 - and r7, r8, #0x70 @ get shift type - orreq r7, r7, #8 @ shift count = 0 - add pc, pc, r7 - nop - - mov r6, r6, lsl r5 @ 0: LSL #!0 - b .data_arm_apply_r6_and_rn - b .data_arm_apply_r6_and_rn @ 1: LSL #0 - nop - b .data_unknown @ 2: MUL? - nop - b .data_unknown @ 3: MUL? - nop - mov r6, r6, lsr r5 @ 4: LSR #!0 - b .data_arm_apply_r6_and_rn - mov r6, r6, lsr #32 @ 5: LSR #32 - b .data_arm_apply_r6_and_rn - b .data_unknown @ 6: MUL? - nop - b .data_unknown @ 7: MUL? - nop - mov r6, r6, asr r5 @ 8: ASR #!0 - b .data_arm_apply_r6_and_rn - mov r6, r6, asr #32 @ 9: ASR #32 - b .data_arm_apply_r6_and_rn - b .data_unknown @ A: MUL? - nop - b .data_unknown @ B: MUL? - nop - mov r6, r6, ror r5 @ C: ROR #!0 - b .data_arm_apply_r6_and_rn - mov r6, r6, rrx @ D: RRX - b .data_arm_apply_r6_and_rn - b .data_unknown @ E: MUL? - nop - b .data_unknown @ F: MUL? - -/* - * Function: arm6_7_proc_init (void) - * : arm6_7_proc_fin (void) - * - * Notes : This processor does not require these - */ -ENTRY(cpu_arm6_proc_init) -ENTRY(cpu_arm7_proc_init) - mov pc, lr - -ENTRY(cpu_arm6_proc_fin) -ENTRY(cpu_arm7_proc_fin) - mov r0, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, r0 - mov r0, #0x31 @ ....S..DP...M - mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr - -ENTRY(cpu_arm6_do_idle) -ENTRY(cpu_arm7_do_idle) - mov pc, lr - -/* - * Function: arm6_7_switch_mm(unsigned long pgd_phys) - * Params : pgd_phys Physical address of page table - * Purpose : Perform a task switch, saving the old processes state, and restoring - * the new. - */ -ENTRY(cpu_arm6_switch_mm) -ENTRY(cpu_arm7_switch_mm) - mov r1, #0 - mcr p15, 0, r1, c7, c0, 0 @ flush cache - mcr p15, 0, r0, c2, c0, 0 @ update page table ptr - mcr p15, 0, r1, c5, c0, 0 @ flush TLBs - mov pc, lr - -/* - * Function: arm6_7_set_pte(pte_t *ptep, pte_t pte) - * Params : r0 = Address to set - * : r1 = value to set - * Purpose : Set a PTE and flush it out of any WB cache - */ - .align 5 -ENTRY(cpu_arm6_set_pte) -ENTRY(cpu_arm7_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young - movne r2, #0 - - str r2, [r0] @ hardware version - mov pc, lr - -/* - * Function: _arm6_7_reset - * Params : r0 = address to jump to - * Notes : This sets up everything for a reset - */ -ENTRY(cpu_arm6_reset) -ENTRY(cpu_arm7_reset) - mov r1, #0 - mcr p15, 0, r1, c7, c0, 0 @ flush cache - mcr p15, 0, r1, c5, c0, 0 @ flush TLB - mov r1, #0x30 - mcr p15, 0, r1, c1, c0, 0 @ turn off MMU etc - mov pc, r0 - - __INIT - - .type __arm6_setup, #function -__arm6_setup: mov r0, #0 - mcr p15, 0, r0, c7, c0 @ flush caches on v3 - mcr p15, 0, r0, c5, c0 @ flush TLBs on v3 - mov r0, #0x3d @ . ..RS BLDP WCAM - orr r0, r0, #0x100 @ . ..01 0011 1101 - mov pc, lr - .size __arm6_setup, . - __arm6_setup - - .type __arm7_setup, #function -__arm7_setup: mov r0, #0 - mcr p15, 0, r0, c7, c0 @ flush caches on v3 - mcr p15, 0, r0, c5, c0 @ flush TLBs on v3 - mcr p15, 0, r0, c3, c0 @ load domain access register - mov r0, #0x7d @ . ..RS BLDP WCAM - orr r0, r0, #0x100 @ . ..01 0111 1101 - mov pc, lr - .size __arm7_setup, . - __arm7_setup - - __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm6_processor_functions, #object -ENTRY(arm6_processor_functions) - .word cpu_arm6_data_abort - .word cpu_arm6_proc_init - .word cpu_arm6_proc_fin - .word cpu_arm6_reset - .word cpu_arm6_do_idle - .word cpu_arm6_dcache_clean_area - .word cpu_arm6_switch_mm - .word cpu_arm6_set_pte - .size arm6_processor_functions, . - arm6_processor_functions - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm7_processor_functions, #object -ENTRY(arm7_processor_functions) - .word cpu_arm7_data_abort - .word cpu_arm7_proc_init - .word cpu_arm7_proc_fin - .word cpu_arm7_reset - .word cpu_arm7_do_idle - .word cpu_arm7_dcache_clean_area - .word cpu_arm7_switch_mm - .word cpu_arm7_set_pte - .size arm7_processor_functions, . - arm7_processor_functions - - .section ".rodata" - - .type cpu_arch_name, #object -cpu_arch_name: .asciz "armv3" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: .asciz "v3" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm6_name, #object -cpu_arm6_name: .asciz "ARM6" - .size cpu_arm6_name, . - cpu_arm6_name - - .type cpu_arm610_name, #object -cpu_arm610_name: - .asciz "ARM610" - .size cpu_arm610_name, . - cpu_arm610_name - - .type cpu_arm7_name, #object -cpu_arm7_name: .asciz "ARM7" - .size cpu_arm7_name, . - cpu_arm7_name - - .type cpu_arm710_name, #object -cpu_arm710_name: - .asciz "ARM710" - .size cpu_arm710_name, . - cpu_arm710_name - - .align - - .section ".proc.info.init", #alloc, #execinstr - - .type __arm6_proc_info, #object -__arm6_proc_info: - .long 0x41560600 - .long 0xfffffff0 - .long 0x00000c1e - b __arm6_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_26BIT - .long cpu_arm6_name - .long arm6_processor_functions - .long v3_tlb_fns - .long v3_user_fns - .long v3_cache_fns - .size __arm6_proc_info, . - __arm6_proc_info - - .type __arm610_proc_info, #object -__arm610_proc_info: - .long 0x41560610 - .long 0xfffffff0 - .long 0x00000c1e - b __arm6_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_26BIT - .long cpu_arm610_name - .long arm6_processor_functions - .long v3_tlb_fns - .long v3_user_fns - .long v3_cache_fns - .size __arm610_proc_info, . - __arm610_proc_info - - .type __arm7_proc_info, #object -__arm7_proc_info: - .long 0x41007000 - .long 0xffffff00 - .long 0x00000c1e - b __arm7_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_26BIT - .long cpu_arm7_name - .long arm7_processor_functions - .long v3_tlb_fns - .long v3_user_fns - .long v3_cache_fns - .size __arm7_proc_info, . - __arm7_proc_info - - .type __arm710_proc_info, #object -__arm710_proc_info: - .long 0x41007100 - .long 0xfff8ff00 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_BIT4 | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __arm7_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_26BIT - .long cpu_arm710_name - .long arm7_processor_functions - .long v3_tlb_fns - .long v3_user_fns - .long v3_cache_fns - .size __arm710_proc_info, . - __arm710_proc_info diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index a13e0184d34..d42c37f9f5b 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -4,6 +4,7 @@ * Copyright (C) 2000 Steve Hill (sjhill@cotw.com) * Rob Scott (rscott@mtrob.fdns.net) * Copyright (C) 2000 ARM Limited, Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2004. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,15 +30,18 @@ * out of 'proc-arm6,7.S' per RMK discussion * 07-25-2000 SJH Added idle function. * 08-25-2000 DBS Updated for integration of ARM Ltd version. + * 04-20-2004 HSC modified for non-paged memory management mode. */ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> -#include <asm/procinfo.h> #include <asm/ptrace.h> -#include <asm/hardware.h> + +#include "proc-macros.S" /* * Function: arm720_proc_init (void) @@ -50,20 +54,16 @@ ENTRY(cpu_arm720_proc_init) mov pc, lr ENTRY(cpu_arm720_proc_fin) - stmfd sp!, {lr} - mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, ip mrc p15, 0, r0, c1, c0, 0 bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mcr p15, 0, r1, c7, c7, 0 @ invalidate cache - ldmfd sp!, {pc} + mov pc, lr /* * Function: arm720_proc_do_idle(void) * Params : r0 = unused - * Purpose : put the processer in proper idle mode + * Purpose : put the processor in proper idle mode */ ENTRY(cpu_arm720_do_idle) mov pc, lr @@ -75,62 +75,54 @@ ENTRY(cpu_arm720_do_idle) * the new. */ ENTRY(cpu_arm720_switch_mm) +#ifdef CONFIG_MMU mov r1, #0 mcr p15, 0, r1, c7, c7, 0 @ invalidate cache mcr p15, 0, r0, c2, c0, 0 @ update page table ptr mcr p15, 0, r1, c8, c7, 0 @ flush TLB (v4) +#endif mov pc, lr /* - * Function: arm720_set_pte(pte_t *ptep, pte_t pte) + * Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext) * Params : r0 = Address to set * : r1 = value to set * Purpose : Set a PTE and flush it out of any WB cache */ - .align 5 -ENTRY(cpu_arm720_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young - movne r2, #0 - - str r2, [r0] @ hardware version - mov pc, lr + .align 5 +ENTRY(cpu_arm720_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 +#endif + mov pc, lr /* * Function: arm720_reset * Params : r0 = address to jump to * Notes : This sets up everything for a reset */ + .pushsection .idmap.text, "ax" ENTRY(cpu_arm720_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate cache +#ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ flush TLB (v4) +#endif mrc p15, 0, ip, c1, c0, 0 @ get ctrl register bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x2100 @ ..v....s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 - - __INIT +ENDPROC(cpu_arm720_reset) + .popsection .type __arm710_setup, #function __arm710_setup: mov r0, #0 mcr p15, 0, r0, c7, c7, 0 @ invalidate caches +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7, 0 @ flush TLB (v4) +#endif mrc p15, 0, r0, c1, c0 @ get control register ldr r5, arm710_cr1_clear bic r0, r0, r5 @@ -156,12 +148,14 @@ arm710_cr1_set: __arm720_setup: mov r0, #0 mcr p15, 0, r0, c7, c7, 0 @ invalidate caches +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7, 0 @ flush TLB (v4) +#endif + adr r5, arm720_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0 @ get control register - ldr r5, arm720_cr1_clear bic r0, r0, r5 - ldr r5, arm720_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 mov pc, lr @ __ret (head.S) .size __arm720_setup, . - __arm720_setup @@ -171,97 +165,55 @@ __arm720_setup: * ..1. 1001 ..11 1101 * */ - .type arm720_cr1_clear, #object - .type arm720_cr1_set, #object -arm720_cr1_clear: - .word 0x2f3f -arm720_cr1_set: - .word 0x213d + .type arm720_crval, #object +arm720_crval: + crval clear=0x00002f3f, mmuset=0x0000213d, ucset=0x00000130 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm720_processor_functions, #object -ENTRY(arm720_processor_functions) - .word v4t_late_abort - .word cpu_arm720_proc_init - .word cpu_arm720_proc_fin - .word cpu_arm720_reset - .word cpu_arm720_do_idle - .word cpu_arm720_dcache_clean_area - .word cpu_arm720_switch_mm - .word cpu_arm720_set_pte - .size arm720_processor_functions, . - arm720_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm720, dabort=v4t_late_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: .asciz "armv4t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm710_name, #object -cpu_arm710_name: - .asciz "ARM710T" - .size cpu_arm710_name, . - cpu_arm710_name - - .type cpu_arm720_name, #object -cpu_arm720_name: - .asciz "ARM720T" - .size cpu_arm720_name, . - cpu_arm720_name + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm710_name, "ARM710T" + string cpu_arm720_name, "ARM720T" .align /* - * See linux/include/asm-arm/procinfo.h for a definition of this structure. + * See <asm/procinfo.h> for a definition of this structure. */ .section ".proc.info.init", #alloc, #execinstr - .type __arm710_proc_info, #object -__arm710_proc_info: - .long 0x41807100 @ cpu_val - .long 0xffffff00 @ cpu_mask +.macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask .long PMD_TYPE_SECT | \ PMD_SECT_BUFFERABLE | \ PMD_SECT_CACHEABLE | \ PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm710_setup @ cpu_flush - .long cpu_arch_name @ arch_name - .long cpu_elf_name @ elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB @ elf_hwcap - .long cpu_arm710_name @ name - .long arm720_processor_functions - .long v4_tlb_fns - .long v4wt_user_fns - .long v4_cache_fns - .size __arm710_proc_info, . - __arm710_proc_info - - .type __arm720_proc_info, #object -__arm720_proc_info: - .long 0x41807200 @ cpu_val - .long 0xffffff00 @ cpu_mask .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm720_setup @ cpu_flush + b \cpu_flush @ cpu_flush .long cpu_arch_name @ arch_name .long cpu_elf_name @ elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB @ elf_hwcap - .long cpu_arm720_name @ name + .long \cpu_name .long arm720_processor_functions .long v4_tlb_fns .long v4wt_user_fns .long v4_cache_fns - .size __arm720_proc_info, . - __arm720_proc_info + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm720_proc_info arm710, 0x41807100, 0xffffff00, cpu_arm710_name, __arm710_setup + arm720_proc_info arm720, 0x41807200, 0xffffff00, cpu_arm720_name, __arm720_setup diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S new file mode 100644 index 00000000000..9b0ae90cbf1 --- /dev/null +++ b/arch/arm/mm/proc-arm740.S @@ -0,0 +1,151 @@ +/* + * linux/arch/arm/mm/arm740.S: utility functions for ARM740 + * + * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + + .text +/* + * cpu_arm740_proc_init() + * cpu_arm740_do_idle() + * cpu_arm740_dcache_clean_area() + * cpu_arm740_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm740_proc_init) +ENTRY(cpu_arm740_do_idle) +ENTRY(cpu_arm740_dcache_clean_area) +ENTRY(cpu_arm740_switch_mm) + mov pc, lr + +/* + * cpu_arm740_proc_fin() + */ +ENTRY(cpu_arm740_proc_fin) + mrc p15, 0, r0, c1, c0, 0 + bic r0, r0, #0x3f000000 @ bank/f/lock/s + bic r0, r0, #0x0000000c @ w-buffer/cache + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm740_reset(loc) + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm740_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c0, 0 @ invalidate cache + mrc p15, 0, ip, c1, c0, 0 @ get ctrl register + bic ip, ip, #0x0000000c @ ............wc.. + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm740_reset) + .popsection + + .type __arm740_setup, #function +__arm740_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c0, 0 @ invalidate caches + + mcr p15, 0, r0, c6, c3 @ disable area 3~7 + mcr p15, 0, r0, c6, c4 + mcr p15, 0, r0, c6, c5 + mcr p15, 0, r0, c6, c6 + mcr p15, 0, r0, c6, c7 + + mov r0, #0x0000003F @ base = 0, size = 4GB + mcr p15, 0, r0, c6, c0 @ set area 0, default + + ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM + ldr r3, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r4, lsl #1 @ the area register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c1 @ set area 1, RAM + + ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH + ldr r3, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) + cmp r3, #0 + moveq r0, #0 + beq 2f + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r4, lsl #1 @ the area register value + orr r0, r0, #1 @ set enable bit +2: mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH + + mov r0, #0x06 + mcr p15, 0, r0, c2, c0 @ Region 1&2 cacheable +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #0x00 @ disable whole write buffer +#else + mov r0, #0x02 @ Region 1 write bufferred +#endif + mcr p15, 0, r0, c3, c0 + + mov r0, #0x10000 + sub r0, r0, #1 @ r0 = 0xffff + mcr p15, 0, r0, c5, c0 @ all read/write access + + mrc p15, 0, r0, c1, c0 @ get control register + bic r0, r0, #0x3F000000 @ set to standard caching mode + @ need some benchmark + orr r0, r0, #0x0000000d @ MPU/Cache/WB + + mov pc, lr + + .size __arm740_setup, . - __arm740_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm740, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_arm740_name, "ARM740T" + + .align + + .section ".proc.info.init", #alloc, #execinstr + .type __arm740_proc_info,#object +__arm740_proc_info: + .long 0x41807400 + .long 0xfffffff0 + .long 0 + .long 0 + b __arm740_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT + .long cpu_arm740_name + .long arm740_processor_functions + .long 0 + .long 0 + .long v4_cache_fns @ cache model + .size __arm740_proc_info, . - __arm740_proc_info diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S new file mode 100644 index 00000000000..f6cc3f63ce3 --- /dev/null +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -0,0 +1,114 @@ +/* + * linux/arch/arm/mm/proc-arm7tdmi.S: utility functions for ARM7TDMI + * + * Copyright (C) 2003-2006 Hyok S. Choi <hyok.choi@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + + .text +/* + * cpu_arm7tdmi_proc_init() + * cpu_arm7tdmi_do_idle() + * cpu_arm7tdmi_dcache_clean_area() + * cpu_arm7tdmi_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm7tdmi_proc_init) +ENTRY(cpu_arm7tdmi_do_idle) +ENTRY(cpu_arm7tdmi_dcache_clean_area) +ENTRY(cpu_arm7tdmi_switch_mm) + mov pc, lr + +/* + * cpu_arm7tdmi_proc_fin() + */ +ENTRY(cpu_arm7tdmi_proc_fin) + mov pc, lr + +/* + * Function: cpu_arm7tdmi_reset(loc) + * Params : loc(r0) address to jump to + * Purpose : Sets up everything for a reset and jump to the location for soft reset. + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm7tdmi_reset) + mov pc, r0 +ENDPROC(cpu_arm7tdmi_reset) + .popsection + + .type __arm7tdmi_setup, #function +__arm7tdmi_setup: + mov pc, lr + .size __arm7tdmi_setup, . - __arm7tdmi_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm7tdmi, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm7tdmi_name, "ARM7TDMI" + string cpu_triscenda7_name, "Triscend-A7x" + string cpu_at91_name, "Atmel-AT91M40xxx" + string cpu_s3c3410_name, "Samsung-S3C3410" + string cpu_s3c44b0x_name, "Samsung-S3C44B0x" + string cpu_s3c4510b_name, "Samsung-S3C4510B" + string cpu_s3c4530_name, "Samsung-S3C4530" + string cpu_netarm_name, "NETARM" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ + extra_hwcaps=0 + .type __\name\()_proc_info, #object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long 0 + .long 0 + b __arm7tdmi_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps ) + .long \cpu_name + .long arm7tdmi_processor_functions + .long 0 + .long 0 + .long v4_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm7tdmi_proc_info arm7tdmi, 0x41007700, 0xfff8ff00, \ + cpu_arm7tdmi_name + arm7tdmi_proc_info triscenda7, 0x0001d2ff, 0x0001ffff, \ + cpu_triscenda7_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info at91, 0x14000040, 0xfff000e0, \ + cpu_at91_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c4510b, 0x36365000, 0xfffff000, \ + cpu_s3c4510b_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c4530, 0x4c000000, 0xfff000e0, \ + cpu_s3c4530_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c3410, 0x34100000, 0xffff0000, \ + cpu_s3c3410_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c44b0x, 0x44b00000, 0xffff0000, \ + cpu_s3c44b0x_name, extra_hwcaps=HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index d1651389999..549557df6d5 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -3,6 +3,7 @@ * * Copyright (C) 1999,2000 ARM Limited * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,12 +26,11 @@ * CONFIG_CPU_ARM920_CPU_IDLE -> nohlt */ #include <linux/linkage.h> -#include <linux/config.h> #include <linux/init.h> #include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> -#include <asm/procinfo.h> -#include <asm/hardware.h> #include <asm/page.h> #include <asm/ptrace.h> #include "proc-macros.S" @@ -53,7 +53,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 65536 @@ -69,19 +69,11 @@ ENTRY(cpu_arm920_proc_init) * cpu_arm920_proc_fin() */ ENTRY(cpu_arm920_proc_fin) - stmfd sp!, {lr} - mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, ip -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - bl arm920_flush_kern_cache_all -#else - bl v4wt_flush_kern_cache_all -#endif mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr /* * cpu_arm920_reset(loc) @@ -93,16 +85,21 @@ ENTRY(cpu_arm920_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm920_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mrc p15, 0, ip, c1, c0, 0 @ ctrl register bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm920_reset) + .popsection /* * cpu_arm920_do_idle() @@ -116,6 +113,17 @@ ENTRY(cpu_arm920_do_idle) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH /* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm920_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(arm920_flush_icache_all) + +/* * flush_user_cache_all() * * Invalidate all cache entries in a particular address @@ -202,18 +210,20 @@ ENTRY(arm920_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - addr - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(arm920_flush_kern_dcache_page) - add r1, r0, #PAGE_SZ +ENTRY(arm920_flush_kern_dcache_area) + add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE cmp r0, r1 @@ -236,7 +246,7 @@ ENTRY(arm920_flush_kern_dcache_page) * * (same as v4wb) */ -ENTRY(arm920_dma_inv_range) +arm920_dma_inv_range: tst r0, #CACHE_DLINESIZE - 1 bic r0, r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -259,7 +269,7 @@ ENTRY(arm920_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm920_dma_clean_range) +arm920_dma_clean_range: bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -285,17 +295,35 @@ ENTRY(arm920_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr -ENTRY(arm920_cache_fns) - .long arm920_flush_kern_cache_all - .long arm920_flush_user_cache_all - .long arm920_flush_user_cache_range - .long arm920_coherent_kern_range - .long arm920_coherent_user_range - .long arm920_flush_kern_dcache_page - .long arm920_dma_inv_range - .long arm920_dma_clean_range - .long arm920_dma_flush_range +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm920_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm920_dma_clean_range + bcs arm920_dma_inv_range + b arm920_dma_flush_range +ENDPROC(arm920_dma_map_area) +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm920_dma_unmap_area) + mov pc, lr +ENDPROC(arm920_dma_unmap_area) + + .globl arm920_flush_kern_cache_louis + .equ arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm920 #endif @@ -317,6 +345,7 @@ ENTRY(cpu_arm920_dcache_clean_area) */ .align 5 ENTRY(cpu_arm920_switch_mm) +#ifdef CONFIG_MMU mov ip, #0 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache @@ -337,56 +366,63 @@ ENTRY(cpu_arm920_switch_mm) mcr p15, 0, ip, c7, c10, 4 @ drain WB mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mov pc, lr /* - * cpu_arm920_set_pte(ptep, pte) + * cpu_arm920_set_pte(ptep, pte, ext) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm920_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r2, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version +ENTRY(cpu_arm920_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif mov pc, lr - __INIT +/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ +.globl cpu_arm920_suspend_size +.equ cpu_arm920_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_arm920_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c13, c0, 0 @ PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c1, c0, 0 @ Control register + stmia r0, {r4 - r6} + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_arm920_do_suspend) + +ENTRY(cpu_arm920_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches + ldmia r0, {r4 - r6} + mcr p15, 0, r4, c13, c0, 0 @ PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + mcr p15, 0, r1, c2, c0, 0 @ TTB address + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_arm920_do_resume) +#endif .type __arm920_setup, #function __arm920_setup: mov r0, #0 mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm920_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0 @ get control register v4 - ldr r5, arm920_cr1_clear bic r0, r0, r5 - ldr r5, arm920_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 mov pc, lr .size __arm920_setup, . - __arm920_setup @@ -396,59 +432,19 @@ __arm920_setup: * ..11 0001 ..11 0101 * */ - .type arm920_cr1_clear, #object - .type arm920_cr1_set, #object -arm920_cr1_clear: - .word 0x3f3f -arm920_cr1_set: - .word 0x3135 + .type arm920_crval, #object +arm920_crval: + crval clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm920_processor_functions, #object -arm920_processor_functions: - .word v4t_early_abort - .word cpu_arm920_proc_init - .word cpu_arm920_proc_fin - .word cpu_arm920_reset - .word cpu_arm920_do_idle - .word cpu_arm920_dcache_clean_area - .word cpu_arm920_switch_mm - .word cpu_arm920_set_pte - .size arm920_processor_functions, . - arm920_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm920, dabort=v4t_early_abort, pabort=legacy_pabort, suspend=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm920_name, #object -cpu_arm920_name: - .ascii "ARM920T" -#ifndef CONFIG_CPU_ICACHE_DISABLE - .ascii "i" -#endif -#ifndef CONFIG_CPU_DCACHE_DISABLE - .ascii "d" -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - .ascii "(wt)" -#else - .ascii "(wb)" -#endif -#endif - .ascii "\0" - .size cpu_arm920_name, . - cpu_arm920_name + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm920_name, "ARM920T" .align @@ -464,6 +460,10 @@ __arm920_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ b __arm920_setup .long cpu_arch_name .long cpu_elf_name diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 23b8ed97f4e..2a758b06c6f 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -4,6 +4,7 @@ * Copyright (C) 1999,2000 ARM Limited * Copyright (C) 2000 Deep Blue Solutions Ltd. * Copyright (C) 2001 Altera Corporation + * hacked for non-paged-MM by Hyok S. Choi, 2003. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,12 +27,11 @@ * CONFIG_CPU_ARM922_CPU_IDLE -> nohlt */ #include <linux/linkage.h> -#include <linux/config.h> #include <linux/init.h> #include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> -#include <asm/procinfo.h> -#include <asm/hardware.h> #include <asm/page.h> #include <asm/ptrace.h> #include "proc-macros.S" @@ -54,7 +54,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. (I think this should + * cache line maintenance instructions. (I think this should * be 32768). */ #define CACHE_DLIMIT 8192 @@ -71,19 +71,11 @@ ENTRY(cpu_arm922_proc_init) * cpu_arm922_proc_fin() */ ENTRY(cpu_arm922_proc_fin) - stmfd sp!, {lr} - mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, ip -#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH - bl arm922_flush_kern_cache_all -#else - bl v4wt_flush_kern_cache_all -#endif mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr /* * cpu_arm922_reset(loc) @@ -95,16 +87,21 @@ ENTRY(cpu_arm922_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm922_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mrc p15, 0, ip, c1, c0, 0 @ ctrl register bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm922_reset) + .popsection /* * cpu_arm922_do_idle() @@ -118,6 +115,17 @@ ENTRY(cpu_arm922_do_idle) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH /* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm922_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(arm922_flush_icache_all) + +/* * flush_user_cache_all() * * Clean and invalidate all cache entries in a particular @@ -204,18 +212,20 @@ ENTRY(arm922_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - addr - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(arm922_flush_kern_dcache_page) - add r1, r0, #PAGE_SZ +ENTRY(arm922_flush_kern_dcache_area) + add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE cmp r0, r1 @@ -238,7 +248,7 @@ ENTRY(arm922_flush_kern_dcache_page) * * (same as v4wb) */ -ENTRY(arm922_dma_inv_range) +arm922_dma_inv_range: tst r0, #CACHE_DLINESIZE - 1 bic r0, r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -261,7 +271,7 @@ ENTRY(arm922_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm922_dma_clean_range) +arm922_dma_clean_range: bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHE_DLINESIZE @@ -287,17 +297,35 @@ ENTRY(arm922_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr -ENTRY(arm922_cache_fns) - .long arm922_flush_kern_cache_all - .long arm922_flush_user_cache_all - .long arm922_flush_user_cache_range - .long arm922_coherent_kern_range - .long arm922_coherent_user_range - .long arm922_flush_kern_dcache_page - .long arm922_dma_inv_range - .long arm922_dma_clean_range - .long arm922_dma_flush_range +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm922_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm922_dma_clean_range + bcs arm922_dma_inv_range + b arm922_dma_flush_range +ENDPROC(arm922_dma_map_area) +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm922_dma_unmap_area) + mov pc, lr +ENDPROC(arm922_dma_unmap_area) + + .globl arm922_flush_kern_cache_louis + .equ arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm922 #endif @@ -321,6 +349,7 @@ ENTRY(cpu_arm922_dcache_clean_area) */ .align 5 ENTRY(cpu_arm922_switch_mm) +#ifdef CONFIG_MMU mov ip, #0 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache @@ -341,56 +370,37 @@ ENTRY(cpu_arm922_switch_mm) mcr p15, 0, ip, c7, c10, 4 @ drain WB mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mov pc, lr /* - * cpu_arm922_set_pte(ptep, pte) + * cpu_arm922_set_pte_ext(ptep, pte, ext) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm922_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r2, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version +ENTRY(cpu_arm922_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif /* CONFIG_MMU */ mov pc, lr - __INIT - .type __arm922_setup, #function __arm922_setup: mov r0, #0 mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm922_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0 @ get control register v4 - ldr r5, arm922_cr1_clear bic r0, r0, r5 - ldr r5, arm922_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 mov pc, lr .size __arm922_setup, . - __arm922_setup @@ -400,59 +410,19 @@ __arm922_setup: * ..11 0001 ..11 0101 * */ - .type arm922_cr1_clear, #object - .type arm922_cr1_set, #object -arm922_cr1_clear: - .word 0x3f3f -arm922_cr1_set: - .word 0x3135 + .type arm922_crval, #object +arm922_crval: + crval clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm922_processor_functions, #object -arm922_processor_functions: - .word v4t_early_abort - .word cpu_arm922_proc_init - .word cpu_arm922_proc_fin - .word cpu_arm922_reset - .word cpu_arm922_do_idle - .word cpu_arm922_dcache_clean_area - .word cpu_arm922_switch_mm - .word cpu_arm922_set_pte - .size arm922_processor_functions, . - arm922_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm922, dabort=v4t_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm922_name, #object -cpu_arm922_name: - .ascii "ARM922T" -#ifndef CONFIG_CPU_ICACHE_DISABLE - .ascii "i" -#endif -#ifndef CONFIG_CPU_DCACHE_DISABLE - .ascii "d" -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - .ascii "(wt)" -#else - .ascii "(wb)" -#endif -#endif - .ascii "\0" - .size cpu_arm922_name, . - cpu_arm922_name + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm922_name, "ARM922T" .align @@ -468,6 +438,10 @@ __arm922_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ b __arm922_setup .long cpu_arch_name .long cpu_elf_name diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index ee95c52db51..ba0d58e1a2a 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -9,6 +9,8 @@ * Update for Linux-2.6 and cache flush improvements * Copyright (C) 2004 Nokia Corporation by Tony Lindgren <tony@atomide.com> * + * hacked for non-paged-MM by Hyok S. Choi, 2004. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -48,12 +50,11 @@ */ #include <linux/linkage.h> -#include <linux/config.h> #include <linux/init.h> #include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> -#include <asm/procinfo.h> -#include <asm/hardware.h> #include <asm/page.h> #include <asm/ptrace.h> #include "proc-macros.S" @@ -76,7 +77,7 @@ /* * This is the size at which it becomes more efficient to * clean the whole cache, rather than using the individual - * cache line maintainence instructions. + * cache line maintenance instructions. */ #define CACHE_DLIMIT 8192 @@ -91,15 +92,11 @@ ENTRY(cpu_arm925_proc_init) * cpu_arm925_proc_fin() */ ENTRY(cpu_arm925_proc_fin) - stmfd sp!, {lr} - mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, ip - bl arm925_flush_kern_cache_all mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr /* * cpu_arm925_reset(loc) @@ -111,6 +108,7 @@ ENTRY(cpu_arm925_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm925_reset) /* Send software reset to MPU and DSP */ mov ip, #0xff000000 @@ -118,11 +116,15 @@ ENTRY(cpu_arm925_reset) orr ip, ip, #0x0000ce00 mov r4, #1 strh r4, [ip, #0x10] +ENDPROC(cpu_arm925_reset) + .popsection mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mrc p15, 0, ip, c1, c0, 0 @ ctrl register bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ @@ -146,6 +148,17 @@ ENTRY(cpu_arm925_do_idle) mov pc, lr /* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm925_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(arm925_flush_icache_all) + +/* * flush_user_cache_all() * * Clean and invalidate all cache entries in a particular @@ -245,18 +258,20 @@ ENTRY(arm925_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - addr - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(arm925_flush_kern_dcache_page) - add r1, r0, #PAGE_SZ +ENTRY(arm925_flush_kern_dcache_area) + add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE cmp r0, r1 @@ -279,7 +294,7 @@ ENTRY(arm925_flush_kern_dcache_page) * * (same as v4wb) */ -ENTRY(arm925_dma_inv_range) +arm925_dma_inv_range: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH tst r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -304,7 +319,7 @@ ENTRY(arm925_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm925_dma_clean_range) +arm925_dma_clean_range: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -329,7 +344,7 @@ ENTRY(arm925_dma_flush_range) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry #else - mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry #endif add r0, r0, #CACHE_DLINESIZE cmp r0, r1 @@ -337,16 +352,35 @@ ENTRY(arm925_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr -ENTRY(arm925_cache_fns) - .long arm925_flush_kern_cache_all - .long arm925_flush_user_cache_all - .long arm925_flush_user_cache_range - .long arm925_coherent_kern_range - .long arm925_coherent_user_range - .long arm925_flush_kern_dcache_page - .long arm925_dma_inv_range - .long arm925_dma_clean_range - .long arm925_dma_flush_range +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm925_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm925_dma_clean_range + bcs arm925_dma_inv_range + b arm925_dma_flush_range +ENDPROC(arm925_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm925_dma_unmap_area) + mov pc, lr +ENDPROC(arm925_dma_unmap_area) + + .globl arm925_flush_kern_cache_louis + .equ arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm925 ENTRY(cpu_arm925_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -369,6 +403,7 @@ ENTRY(cpu_arm925_dcache_clean_area) */ .align 5 ENTRY(cpu_arm925_switch_mm) +#ifdef CONFIG_MMU mov ip, #0 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache @@ -383,47 +418,26 @@ ENTRY(cpu_arm925_switch_mm) mcr p15, 0, ip, c7, c10, 4 @ drain WB mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mov pc, lr /* - * cpu_arm925_set_pte(ptep, pte) + * cpu_arm925_set_pte_ext(ptep, pte, ext) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm925_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r2, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version +ENTRY(cpu_arm925_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif /* CONFIG_MMU */ mov pc, lr - __INIT - .type __arm925_setup, #function __arm925_setup: mov r0, #0 @@ -438,18 +452,20 @@ __arm925_setup: mov r0, #0 mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mov r0, #4 @ disable write-back on caches explicitly mcr p15, 7, r0, c15, c0, 0 #endif + adr r5, arm925_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0 @ get control register v4 - ldr r5, arm925_cr1_clear bic r0, r0, r5 - ldr r5, arm925_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .1.. .... .... .... #endif @@ -462,90 +478,34 @@ __arm925_setup: * .011 0001 ..11 1101 * */ - .type arm925_cr1_clear, #object - .type arm925_cr1_set, #object -arm925_cr1_clear: - .word 0x7f3f -arm925_cr1_set: - .word 0x313d + .type arm925_crval, #object +arm925_crval: + crval clear=0x00007f3f, mmuset=0x0000313d, ucset=0x00001130 __INITDATA - -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm925_processor_functions, #object -arm925_processor_functions: - .word v4t_early_abort - .word cpu_arm925_proc_init - .word cpu_arm925_proc_fin - .word cpu_arm925_reset - .word cpu_arm925_do_idle - .word cpu_arm925_dcache_clean_area - .word cpu_arm925_switch_mm - .word cpu_arm925_set_pte - .size arm925_processor_functions, . - arm925_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm925, dabort=v4t_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4t" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm925_name, #object -cpu_arm925_name: - .ascii "ARM925T" -#ifndef CONFIG_CPU_ICACHE_DISABLE - .ascii "i" -#endif -#ifndef CONFIG_CPU_DCACHE_DISABLE - .ascii "d" -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - .ascii "(wt)" -#else - .ascii "(wb)" -#endif -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - .ascii "RR" -#endif -#endif - .ascii "\0" - .size cpu_arm925_name, . - cpu_arm925_name + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm925_name, "ARM925T" .align .section ".proc.info.init", #alloc, #execinstr - .type __arm925_proc_info,#object -__arm925_proc_info: - .long 0x54029250 - .long 0xfffffff0 +.macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask .long PMD_TYPE_SECT | \ + PMD_SECT_CACHEABLE | \ PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __arm925_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB - .long cpu_arm925_name - .long arm925_processor_functions - .long v4wbi_tlb_fns - .long v4wb_user_fns - .long arm925_cache_fns - .size __arm925_proc_info, . - __arm925_proc_info - - .type __arm915_proc_info,#object -__arm915_proc_info: - .long 0x54029150 - .long 0xfffffff0 .long PMD_TYPE_SECT | \ PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ @@ -559,4 +519,8 @@ __arm915_proc_info: .long v4wbi_tlb_fns .long v4wb_user_fns .long arm925_cache_fns - .size __arm925_proc_info, . - __arm925_proc_info + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm925_proc_info arm925, 0x54029250, 0xfffffff0, cpu_arm925_name + arm925_proc_info arm915, 0x54029150, 0xfffffff0, cpu_arm925_name diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 7d042dc20c4..0f098f407c9 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -3,6 +3,7 @@ * * Copyright (C) 1999-2001 ARM Limited * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,12 +26,11 @@ * CONFIG_CPU_ARM926_CPU_IDLE -> nohlt */ #include <linux/linkage.h> -#include <linux/config.h> #include <linux/init.h> #include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> -#include <asm/procinfo.h> -#include <asm/hardware.h> #include <asm/page.h> #include <asm/ptrace.h> #include "proc-macros.S" @@ -61,15 +61,11 @@ ENTRY(cpu_arm926_proc_init) * cpu_arm926_proc_fin() */ ENTRY(cpu_arm926_proc_fin) - stmfd sp!, {lr} - mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, ip - bl arm926_flush_kern_cache_all mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr /* * cpu_arm926_reset(loc) @@ -81,16 +77,21 @@ ENTRY(cpu_arm926_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_arm926_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mrc p15, 0, ip, c1, c0, 0 @ ctrl register bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_arm926_reset) + .popsection /* * cpu_arm926_do_idle() @@ -103,10 +104,25 @@ ENTRY(cpu_arm926_do_idle) mrc p15, 0, r1, c1, c0, 0 @ Read control register mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer bic r2, r1, #1 << 12 + mrs r3, cpsr @ Disable FIQs while Icache + orr ip, r3, #PSR_F_BIT @ is disabled + msr cpsr_c, ip mcr p15, 0, r2, c1, c0, 0 @ Disable I cache mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable + msr cpsr_c, r3 @ Restore FIQ state + mov pc, lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm926_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mov pc, lr +ENDPROC(arm926_flush_icache_all) /* * flush_user_cache_all() @@ -205,18 +221,20 @@ ENTRY(arm926_coherent_user_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - addr - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(arm926_flush_kern_dcache_page) - add r1, r0, #PAGE_SZ +ENTRY(arm926_flush_kern_dcache_area) + add r1, r0, r1 1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry add r0, r0, #CACHE_DLINESIZE cmp r0, r1 @@ -239,7 +257,7 @@ ENTRY(arm926_flush_kern_dcache_page) * * (same as v4wb) */ -ENTRY(arm926_dma_inv_range) +arm926_dma_inv_range: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH tst r0, #CACHE_DLINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -264,7 +282,7 @@ ENTRY(arm926_dma_inv_range) * * (same as v4wb) */ -ENTRY(arm926_dma_clean_range) +arm926_dma_clean_range: #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH bic r0, r0, #CACHE_DLINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -289,7 +307,7 @@ ENTRY(arm926_dma_flush_range) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry #else - mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry #endif add r0, r0, #CACHE_DLINESIZE cmp r0, r1 @@ -297,16 +315,35 @@ ENTRY(arm926_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain WB mov pc, lr -ENTRY(arm926_cache_fns) - .long arm926_flush_kern_cache_all - .long arm926_flush_user_cache_all - .long arm926_flush_user_cache_range - .long arm926_coherent_kern_range - .long arm926_coherent_user_range - .long arm926_flush_kern_dcache_page - .long arm926_dma_inv_range - .long arm926_dma_clean_range - .long arm926_dma_flush_range +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm926_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm926_dma_clean_range + bcs arm926_dma_inv_range + b arm926_dma_flush_range +ENDPROC(arm926_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm926_dma_unmap_area) + mov pc, lr +ENDPROC(arm926_dma_unmap_area) + + .globl arm926_flush_kern_cache_louis + .equ arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm926 ENTRY(cpu_arm926_dcache_clean_area) #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -329,6 +366,7 @@ ENTRY(cpu_arm926_dcache_clean_area) */ .align 5 ENTRY(cpu_arm926_switch_mm) +#ifdef CONFIG_MMU mov ip, #0 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache @@ -341,53 +379,60 @@ ENTRY(cpu_arm926_switch_mm) mcr p15, 0, ip, c7, c10, 4 @ drain WB mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mov pc, lr /* - * cpu_arm926_set_pte(ptep, pte) + * cpu_arm926_set_pte_ext(ptep, pte, ext) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_arm926_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - eor r3, r2, #0x0a @ C & small page? - tst r3, #0x0b - biceq r2, r2, #4 -#endif - str r2, [r0] @ hardware version +ENTRY(cpu_arm926_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext mov r0, r0 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif mov pc, lr - __INIT +/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ +.globl cpu_arm926_suspend_size +.equ cpu_arm926_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_arm926_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c13, c0, 0 @ PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c1, c0, 0 @ Control register + stmia r0, {r4 - r6} + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_arm926_do_suspend) + +ENTRY(cpu_arm926_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches + ldmia r0, {r4 - r6} + mcr p15, 0, r4, c13, c0, 0 @ PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + mcr p15, 0, r1, c2, c0, 0 @ TTB address + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_arm926_do_resume) +#endif .type __arm926_setup, #function __arm926_setup: mov r0, #0 mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -395,11 +440,11 @@ __arm926_setup: mcr p15, 7, r0, c15, c0, 0 #endif + adr r5, arm926_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0 @ get control register v4 - ldr r5, arm926_cr1_clear bic r0, r0, r5 - ldr r5, arm926_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .1.. .... .... .... #endif @@ -412,62 +457,20 @@ __arm926_setup: * .011 0001 ..11 0101 * */ - .type arm926_cr1_clear, #object - .type arm926_cr1_set, #object -arm926_cr1_clear: - .word 0x7f3f -arm926_cr1_set: - .word 0x3135 + .type arm926_crval, #object +arm926_crval: + crval clear=0x00007f3f, mmuset=0x00003135, ucset=0x00001134 __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - .type arm926_processor_functions, #object -arm926_processor_functions: - .word v5tj_early_abort - .word cpu_arm926_proc_init - .word cpu_arm926_proc_fin - .word cpu_arm926_reset - .word cpu_arm926_do_idle - .word cpu_arm926_dcache_clean_area - .word cpu_arm926_switch_mm - .word cpu_arm926_set_pte - .size arm926_processor_functions, . - arm926_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm926, dabort=v5tj_early_abort, pabort=legacy_pabort, suspend=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5tej" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_arm926_name, #object -cpu_arm926_name: - .ascii "ARM926EJ-S" -#ifndef CONFIG_CPU_ICACHE_DISABLE - .ascii "i" -#endif -#ifndef CONFIG_CPU_DCACHE_DISABLE - .ascii "d" -#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH - .ascii "(wt)" -#else - .ascii "(wb)" -#endif -#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN - .ascii "RR" -#endif -#endif - .ascii "\0" - .size cpu_arm926_name, . - cpu_arm926_name + string cpu_arch_name, "armv5tej" + string cpu_elf_name, "v5" + string cpu_arm926_name, "ARM926EJ-S" .align @@ -483,6 +486,10 @@ __arm926_proc_info: PMD_BIT4 | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ b __arm926_setup .long cpu_arch_name .long cpu_elf_name diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S new file mode 100644 index 00000000000..1c39a704ff6 --- /dev/null +++ b/arch/arm/mm/proc-arm940.S @@ -0,0 +1,374 @@ +/* + * linux/arch/arm/mm/arm940.S: utility functions for ARM940T + * + * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* ARM940T has a 4KB DCache comprising 256 lines of 4 words */ +#define CACHE_DLINESIZE 16 +#define CACHE_DSEGMENTS 4 +#define CACHE_DENTRIES 64 + + .text +/* + * cpu_arm940_proc_init() + * cpu_arm940_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm940_proc_init) +ENTRY(cpu_arm940_switch_mm) + mov pc, lr + +/* + * cpu_arm940_proc_fin() + */ +ENTRY(cpu_arm940_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x00001000 @ i-cache + bic r0, r0, #0x00000004 @ d-cache + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm940_reset(loc) + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm940_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ flush I cache + mcr p15, 0, ip, c7, c6, 0 @ flush D cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x00000005 @ .............c.p + bic ip, ip, #0x00001000 @ i-cache + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm940_reset) + .popsection + +/* + * cpu_arm940_do_idle() + */ + .align 5 +ENTRY(cpu_arm940_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm940_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(arm940_flush_icache_all) + +/* + * flush_user_cache_all() + */ +ENTRY(arm940_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm940_flush_kern_cache_all) + mov r2, #VM_EXEC + /* FALLTHROUGH */ + +/* + * flush_user_cache_range(start, end, flags) + * + * There is no efficient way to flush a range of cache entries + * in the specified address range. Thus, flushes all. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm940_flush_user_cache_range) + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ flush D cache +#else + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 3 to 0 +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm940_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm940_coherent_user_range) + /* FALLTHROUGH */ + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm940_flush_kern_dcache_area) + mov r0, #0 + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * There is no efficient way to invalidate a specifid virtual + * address range. Thus, invalidates all. + * + * - start - virtual start address + * - end - virtual end address + */ +arm940_dma_inv_range: + mov ip, #0 + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c6, 2 @ flush D entry + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * There is no efficient way to clean a specifid virtual + * address range. Thus, cleans all. + * + * - start - virtual start address + * - end - virtual end address + */ +arm940_dma_clean_range: +ENTRY(cpu_arm940_dcache_clean_area) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c10, 2 @ clean D entry + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * There is no efficient way to clean and invalidate a specifid + * virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm940_dma_flush_range) + mov ip, #0 + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r3, c7, c14, 2 @ clean/flush D entry +#else + mcr p15, 0, r3, c7, c6, 2 @ invalidate D entry +#endif + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm940_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm940_dma_clean_range + bcs arm940_dma_inv_range + b arm940_dma_flush_range +ENDPROC(arm940_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm940_dma_unmap_area) + mov pc, lr +ENDPROC(arm940_dma_unmap_area) + + .globl arm940_flush_kern_cache_louis + .equ arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm940 + + .type __arm940_setup, #function +__arm940_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c6, 0 @ invalidate D cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + + mcr p15, 0, r0, c6, c3, 0 @ disable data area 3~7 + mcr p15, 0, r0, c6, c4, 0 + mcr p15, 0, r0, c6, c5, 0 + mcr p15, 0, r0, c6, c6, 0 + mcr p15, 0, r0, c6, c7, 0 + + mcr p15, 0, r0, c6, c3, 1 @ disable instruction area 3~7 + mcr p15, 0, r0, c6, c4, 1 + mcr p15, 0, r0, c6, c5, 1 + mcr p15, 0, r0, c6, c6, 1 + mcr p15, 0, r0, c6, c7, 1 + + mov r0, #0x0000003F @ base = 0, size = 4GB + mcr p15, 0, r0, c6, c0, 0 @ set area 0, default + mcr p15, 0, r0, c6, c0, 1 + + ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM + ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) + mov r2, #10 @ 11 is the minimum (4KB) +1: add r2, r2, #1 @ area size *= 2 + mov r1, r1, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c1, 0 @ set area 1, RAM + mcr p15, 0, r0, c6, c1, 1 + + ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH + ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) + mov r2, #10 @ 11 is the minimum (4KB) +1: add r2, r2, #1 @ area size *= 2 + mov r1, r1, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c2, 0 @ set area 2, ROM/FLASH + mcr p15, 0, r0, c6, c2, 1 + + mov r0, #0x06 + mcr p15, 0, r0, c2, c0, 0 @ Region 1&2 cacheable + mcr p15, 0, r0, c2, c0, 1 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #0x00 @ disable whole write buffer +#else + mov r0, #0x02 @ Region 1 write bufferred +#endif + mcr p15, 0, r0, c3, c0, 0 + + mov r0, #0x10000 + sub r0, r0, #1 @ r0 = 0xffff + mcr p15, 0, r0, c5, c0, 0 @ all read/write access + mcr p15, 0, r0, c5, c0, 1 + + mrc p15, 0, r0, c1, c0 @ get control register + orr r0, r0, #0x00001000 @ I-cache + orr r0, r0, #0x00000005 @ MPU/D-cache + + mov pc, lr + + .size __arm940_setup, . - __arm940_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm940, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm940_name, "ARM940T" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __arm940_proc_info,#object +__arm940_proc_info: + .long 0x41009400 + .long 0xff00fff0 + .long 0 + b __arm940_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm940_name + .long arm940_processor_functions + .long 0 + .long 0 + .long arm940_cache_fns + .size __arm940_proc_info, . - __arm940_proc_info + diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S new file mode 100644 index 00000000000..0289cd905e7 --- /dev/null +++ b/arch/arm/mm/proc-arm946.S @@ -0,0 +1,429 @@ +/* + * linux/arch/arm/mm/arm946.S: utility functions for ARM946E-S + * + * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) + * + * (Many of cache codes are from proc-arm926.S) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * ARM946E-S is synthesizable to have 0KB to 1MB sized D-Cache, + * comprising 256 lines of 32 bytes (8 words). + */ +#define CACHE_DSIZE (CONFIG_CPU_DCACHE_SIZE) /* typically 8KB. */ +#define CACHE_DLINESIZE 32 /* fixed */ +#define CACHE_DSEGMENTS 4 /* fixed */ +#define CACHE_DENTRIES (CACHE_DSIZE / CACHE_DSEGMENTS / CACHE_DLINESIZE) +#define CACHE_DLIMIT (CACHE_DSIZE * 4) /* benchmark needed */ + + .text +/* + * cpu_arm946_proc_init() + * cpu_arm946_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm946_proc_init) +ENTRY(cpu_arm946_switch_mm) + mov pc, lr + +/* + * cpu_arm946_proc_fin() + */ +ENTRY(cpu_arm946_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x00001000 @ i-cache + bic r0, r0, #0x00000004 @ d-cache + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm946_reset(loc) + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm946_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ flush I cache + mcr p15, 0, ip, c7, c6, 0 @ flush D cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x00000005 @ .............c.p + bic ip, ip, #0x00001000 @ i-cache + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm946_reset) + .popsection + +/* + * cpu_arm946_do_idle() + */ + .align 5 +ENTRY(cpu_arm946_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm946_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(arm946_flush_icache_all) + +/* + * flush_user_cache_all() + */ +ENTRY(arm946_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm946_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ flush D cache +#else + mov r1, #(CACHE_DSEGMENTS - 1) << 29 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 4 @ n entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index + subs r3, r3, #1 << 4 + bcs 2b @ entries n to 0 + subs r1, r1, #1 << 29 + bcs 1b @ segments 3 to 0 +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ flush I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + * (same as arm926) + */ +ENTRY(arm946_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: tst r2, #VM_EXEC +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#else + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#endif + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm946_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * (same as arm926) + */ +ENTRY(arm946_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + * (same as arm926) + */ +ENTRY(arm946_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * (same as arm926) + */ +arm946_dma_inv_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +#endif + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as arm926) + */ +arm946_dma_clean_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as arm926) + */ +ENTRY(arm946_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry +#else + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm946_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm946_dma_clean_range + bcs arm946_dma_inv_range + b arm946_dma_flush_range +ENDPROC(arm946_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm946_dma_unmap_area) + mov pc, lr +ENDPROC(arm946_dma_unmap_area) + + .globl arm946_flush_kern_cache_louis + .equ arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm946 + +ENTRY(cpu_arm946_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + .type __arm946_setup, #function +__arm946_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c6, 0 @ invalidate D cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + + mcr p15, 0, r0, c6, c3, 0 @ disable memory region 3~7 + mcr p15, 0, r0, c6, c4, 0 + mcr p15, 0, r0, c6, c5, 0 + mcr p15, 0, r0, c6, c6, 0 + mcr p15, 0, r0, c6, c7, 0 + + mov r0, #0x0000003F @ base = 0, size = 4GB + mcr p15, 0, r0, c6, c0, 0 @ set region 0, default + + ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM + ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) + mov r2, #10 @ 11 is the minimum (4KB) +1: add r2, r2, #1 @ area size *= 2 + mov r1, r1, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r2, lsl #1 @ the region register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c1, 0 @ set region 1, RAM + + ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH + ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) + mov r2, #10 @ 11 is the minimum (4KB) +1: add r2, r2, #1 @ area size *= 2 + mov r1, r1, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r2, lsl #1 @ the region register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c2, 0 @ set region 2, ROM/FLASH + + mov r0, #0x06 + mcr p15, 0, r0, c2, c0, 0 @ region 1,2 d-cacheable + mcr p15, 0, r0, c2, c0, 1 @ region 1,2 i-cacheable +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #0x00 @ disable whole write buffer +#else + mov r0, #0x02 @ region 1 write bufferred +#endif + mcr p15, 0, r0, c3, c0, 0 + +/* + * Access Permission Settings for future permission control by PU. + * + * priv. user + * region 0 (whole) rw -- : b0001 + * region 1 (RAM) rw rw : b0011 + * region 2 (FLASH) rw r- : b0010 + * region 3~7 (none) -- -- : b0000 + */ + mov r0, #0x00000031 + orr r0, r0, #0x00000200 + mcr p15, 0, r0, c5, c0, 2 @ set data access permission + mcr p15, 0, r0, c5, c0, 3 @ set inst. access permission + + mrc p15, 0, r0, c1, c0 @ get control register + orr r0, r0, #0x00001000 @ I-cache + orr r0, r0, #0x00000005 @ MPU/D-cache +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x00004000 @ .1.. .... .... .... +#endif + mov pc, lr + + .size __arm946_setup, . - __arm946_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm946, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5t" + string cpu_arm946_name, "ARM946E-S" + + .align + + .section ".proc.info.init", #alloc, #execinstr + .type __arm946_proc_info,#object +__arm946_proc_info: + .long 0x41009460 + .long 0xff00fff0 + .long 0 + .long 0 + b __arm946_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm946_name + .long arm946_processor_functions + .long 0 + .long 0 + .long arm946_cache_fns + .size __arm946_proc_info, . - __arm946_proc_info + diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S new file mode 100644 index 00000000000..f51197ba754 --- /dev/null +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -0,0 +1,95 @@ +/* + * linux/arch/arm/mm/proc-arm9tdmi.S: utility functions for ARM9TDMI + * + * Copyright (C) 2003-2006 Hyok S. Choi <hyok.choi@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + + .text +/* + * cpu_arm9tdmi_proc_init() + * cpu_arm9tdmi_do_idle() + * cpu_arm9tdmi_dcache_clean_area() + * cpu_arm9tdmi_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm9tdmi_proc_init) +ENTRY(cpu_arm9tdmi_do_idle) +ENTRY(cpu_arm9tdmi_dcache_clean_area) +ENTRY(cpu_arm9tdmi_switch_mm) + mov pc, lr + +/* + * cpu_arm9tdmi_proc_fin() + */ +ENTRY(cpu_arm9tdmi_proc_fin) + mov pc, lr + +/* + * Function: cpu_arm9tdmi_reset(loc) + * Params : loc(r0) address to jump to + * Purpose : Sets up everything for a reset and jump to the location for soft reset. + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm9tdmi_reset) + mov pc, r0 +ENDPROC(cpu_arm9tdmi_reset) + .popsection + + .type __arm9tdmi_setup, #function +__arm9tdmi_setup: + mov pc, lr + .size __arm9tdmi_setup, . - __arm9tdmi_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm9tdmi, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm9tdmi_name, "ARM9TDMI" + string cpu_p2001_name, "P2001" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req + .type __\name\()_proc_info, #object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long 0 + .long 0 + b __arm9tdmi_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT + .long \cpu_name + .long arm9tdmi_processor_functions + .long 0 + .long 0 + .long v4_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm9tdmi_proc_info arm9tdmi, 0x41009900, 0xfff8ff00, cpu_arm9tdmi_name + arm9tdmi_proc_info p2001, 0x41029000, 0xffffffff, cpu_p2001_name diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S new file mode 100644 index 00000000000..2dfc0f1d3bf --- /dev/null +++ b/arch/arm/mm/proc-fa526.S @@ -0,0 +1,218 @@ +/* + * linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526 + * + * Written by : Luke Lee + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * + * These are the low level assembler for performing cache and TLB + * functions on the fa526. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +#define CACHE_DLINESIZE 16 + + .text +/* + * cpu_fa526_proc_init() + */ +ENTRY(cpu_fa526_proc_init) + mov pc, lr + +/* + * cpu_fa526_proc_fin() + */ +ENTRY(cpu_fa526_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + nop + nop + mov pc, lr + +/* + * cpu_fa526_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 4 + .pushsection .idmap.text, "ax" +ENTRY(cpu_fa526_reset) +/* TODO: Use CP8 if possible... */ + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + bic ip, ip, #0x0800 @ BTB off + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + nop + nop + mov pc, r0 +ENDPROC(cpu_fa526_reset) + .popsection + +/* + * cpu_fa526_do_idle() + */ + .align 4 +ENTRY(cpu_fa526_do_idle) + mov pc, lr + + +ENTRY(cpu_fa526_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_fa526_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 4 +ENTRY(cpu_fa526_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + mcr p15, 0, ip, c7, c14, 0 @ clean and invalidate whole D cache +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c5, 6 @ invalidate BTB since mm changed + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB +#endif + mov pc, lr + +/* + * cpu_fa526_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 4 +ENTRY(cpu_fa526_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mov pc, lr + + .type __fa526_setup, #function +__fa526_setup: + /* On return of this routine, r0 must carry correct flags for CFG register */ + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + mcr p15, 0, r0, c7, c5, 5 @ invalidate IScratchpad RAM + + mov r0, #1 + mcr p15, 0, r0, c1, c1, 0 @ turn-on ECR + + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB All + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + + mov r0, #0x1f @ Domains 0, 1 = manager, 2 = client + mcr p15, 0, r0, c3, c0 @ load domain access register + + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, fa526_cr1_clear + bic r0, r0, r5 + ldr r5, fa526_cr1_set + orr r0, r0, r5 + mov pc, lr + .size __fa526_setup, . - __fa526_setup + + /* + * .RVI ZFRS BLDP WCAM + * ..11 1001 .111 1101 + * + */ + .type fa526_cr1_clear, #object + .type fa526_cr1_set, #object +fa526_cr1_clear: + .word 0x3f3f +fa526_cr1_set: + .word 0x397D + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions fa526, dabort=v4_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_fa526_name, "FA526" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __fa526_proc_info,#object +__fa526_proc_info: + .long 0x66015261 + .long 0xff01fff1 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __fa526_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF + .long cpu_fa526_name + .long fa526_processor_functions + .long fa_tlb_fns + .long fa_user_fns + .long fa_cache_fns + .size __fa526_proc_info, . - __fa526_proc_info diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S new file mode 100644 index 00000000000..db79b62c92f --- /dev/null +++ b/arch/arm/mm/proc-feroceon.S @@ -0,0 +1,626 @@ +/* + * linux/arch/arm/mm/proc-feroceon.S: MMU functions for Feroceon + * + * Heavily based on proc-arm926.S + * Maintainer: Assaf Hoffman <hoffman@marvell.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define CACHE_DLIMIT 16384 + +/* + * the cache line size of the I and D cache + */ +#define CACHE_DLINESIZE 32 + + .bss + .align 3 +__cache_params_loc: + .space 8 + + .text +__cache_params: + .word __cache_params_loc + +/* + * cpu_feroceon_proc_init() + */ +ENTRY(cpu_feroceon_proc_init) + mrc p15, 0, r0, c0, c0, 1 @ read cache type register + ldr r1, __cache_params + mov r2, #(16 << 5) + tst r0, #(1 << 16) @ get way + mov r0, r0, lsr #18 @ get cache size order + movne r3, #((4 - 1) << 30) @ 4-way + and r0, r0, #0xf + moveq r3, #0 @ 1-way + mov r2, r2, lsl r0 @ actual cache size + movne r2, r2, lsr #2 @ turned into # of sets + sub r2, r2, #(1 << 5) + stmia r1, {r2, r3} + mov pc, lr + +/* + * cpu_feroceon_proc_fin() + */ +ENTRY(cpu_feroceon_proc_fin) +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mov r0, #0 + mcr p15, 1, r0, c15, c9, 0 @ clean L2 + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_feroceon_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_feroceon_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_feroceon_reset) + .popsection + +/* + * cpu_feroceon_do_idle() + * + * Called with IRQs disabled + */ + .align 5 +ENTRY(cpu_feroceon_do_idle) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(feroceon_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(feroceon_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ + .align 5 +ENTRY(feroceon_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(feroceon_flush_kern_cache_all) + mov r2, #VM_EXEC + +__flush_whole_cache: + ldr r1, __cache_params + ldmia r1, {r1, r3} +1: orr ip, r1, r3 +2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way + subs ip, ip, #(1 << 30) @ next way + bcs 2b + subs r1, r1, #(1 << 5) @ next set + bcs 1b + + tst r2, #VM_EXEC + mov ip, #0 + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ + .align 5 +ENTRY(feroceon_flush_user_cache_range) + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mov ip, #0 + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ + .align 5 +ENTRY(feroceon_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(feroceon_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ + .align 5 +ENTRY(feroceon_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + .align 5 +ENTRY(feroceon_range_flush_kern_dcache_area) + mrs r2, cpsr + add r1, r0, #PAGE_SZ - CACHE_DLINESIZE @ top addr is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c15, 0 @ D clean/inv range start + mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top + msr cpsr_c, r2 @ restore interrupts + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ + .align 5 +feroceon_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + .align 5 +feroceon_range_dma_inv_range: + mrs r2, cpsr + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c14, 0 @ D inv range start + mcr p15, 5, r1, c15, c14, 1 @ D inv range top + msr cpsr_c, r2 @ restore interrupts + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ + .align 5 +feroceon_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + .align 5 +feroceon_range_dma_clean_range: + mrs r2, cpsr + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c13, 0 @ D clean range start + mcr p15, 5, r1, c15, c13, 1 @ D clean range top + msr cpsr_c, r2 @ restore interrupts + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ + .align 5 +ENTRY(feroceon_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + .align 5 +ENTRY(feroceon_range_dma_flush_range) + mrs r2, cpsr + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c15, 0 @ D clean/inv range start + mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top + msr cpsr_c, r2 @ restore interrupts + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq feroceon_dma_clean_range + bcs feroceon_dma_inv_range + b feroceon_dma_flush_range +ENDPROC(feroceon_dma_map_area) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_range_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq feroceon_range_dma_clean_range + bcs feroceon_range_dma_inv_range + b feroceon_range_dma_flush_range +ENDPROC(feroceon_range_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_dma_unmap_area) + mov pc, lr +ENDPROC(feroceon_dma_unmap_area) + + .globl feroceon_flush_kern_cache_louis + .equ feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions feroceon + +.macro range_alias basename + .globl feroceon_range_\basename + .type feroceon_range_\basename , %function + .equ feroceon_range_\basename , feroceon_\basename +.endm + +/* + * Most of the cache functions are unchanged for this case. + * Export suitable alias symbols for the unchanged functions: + */ + range_alias flush_icache_all + range_alias flush_user_cache_all + range_alias flush_kern_cache_all + range_alias flush_kern_cache_louis + range_alias flush_user_cache_range + range_alias coherent_kern_range + range_alias coherent_user_range + range_alias dma_unmap_area + + define_cache_functions feroceon_range + + .align 5 +ENTRY(cpu_feroceon_dcache_clean_area) +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mov r2, r0 + mov r3, r1 +#endif +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) +1: mcr p15, 1, r2, c15, c9, 1 @ clean L2 entry + add r2, r2, #CACHE_DLINESIZE + subs r3, r3, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_feroceon_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_feroceon_switch_mm) +#ifdef CONFIG_MMU + /* + * Note: we wish to call __flush_whole_cache but we need to preserve + * lr to do so. The only way without touching main memory is to + * use r2 which is normally used to test the VM_EXEC flag, and + * compensate locally for the skipped ops if it is not set. + */ + mov r2, lr @ abuse r2 to preserve lr + bl __flush_whole_cache + @ if r2 contains the VM_EXEC bit then the next 2 ops are done already + tst r2, #VM_EXEC + mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcreq p15, 0, ip, c7, c10, 4 @ drain WB + + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, r2 +#else + mov pc, lr +#endif + +/* + * cpu_feroceon_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_feroceon_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mcr p15, 1, r0, c15, c9, 1 @ clean L2 entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mov pc, lr + +/* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */ +.globl cpu_feroceon_suspend_size +.equ cpu_feroceon_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_feroceon_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c13, c0, 0 @ PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c1, c0, 0 @ Control register + stmia r0, {r4 - r6} + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_feroceon_do_suspend) + +ENTRY(cpu_feroceon_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches + ldmia r0, {r4 - r6} + mcr p15, 0, r4, c13, c0, 0 @ PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + mcr p15, 0, r1, c2, c0, 0 @ TTB address + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_feroceon_do_resume) +#endif + + .type __feroceon_setup, #function +__feroceon_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + + adr r5, feroceon_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 + mov pc, lr + .size __feroceon_setup, . - __feroceon_setup + + /* + * B + * R P + * .RVI UFRS BLDP WCAM + * .011 .001 ..11 0101 + * + */ + .type feroceon_crval, #object +feroceon_crval: + crval clear=0x0000773f, mmuset=0x00003135, ucset=0x00001134 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions feroceon, dabort=v5t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_feroceon_name, "Feroceon" + string cpu_88fr531_name, "Feroceon 88FR531-vd" + string cpu_88fr571_name, "Feroceon 88FR571-vd" + string cpu_88fr131_name, "Feroceon 88FR131" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __feroceon_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long \cpu_name + .long feroceon_processor_functions + .long v4wbi_tlb_fns + .long feroceon_user_fns + .long \cache + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + +#ifdef CONFIG_CPU_FEROCEON_OLD_ID + feroceon_proc_info feroceon_old_id, 0x41009260, 0xff00fff0, \ + cpu_name=cpu_feroceon_name, cache=feroceon_cache_fns +#endif + + feroceon_proc_info 88fr531, 0x56055310, 0xfffffff0, cpu_88fr531_name, \ + cache=feroceon_cache_fns + feroceon_proc_info 88fr571, 0x56155710, 0xfffffff0, cpu_88fr571_name, \ + cache=feroceon_range_cache_fns + feroceon_proc_info 88fr131, 0x56251310, 0xfffffff0, cpu_88fr131_name, \ + cache=feroceon_range_cache_fns diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 7cfc2604a1e..ee1d8059395 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -38,9 +38,14 @@ /* * mmid - get context id from mm pointer (mm->context.id) + * note, this field is 64bit, so in big-endian the two words are swapped too. */ .macro mmid, rd, rn +#ifdef __ARMEB__ + ldr \rd, [\rn, #MM_CONTEXT_ID + 4 ] +#else ldr \rd, [\rn, #MM_CONTEXT_ID] +#endif .endm /* @@ -49,3 +54,280 @@ .macro asid, rd, rn and \rd, \rn, #255 .endm + + .macro crval, clear, mmuset, ucset +#ifdef CONFIG_MMU + .word \clear + .word \mmuset +#else + .word \clear + .word \ucset +#endif + .endm + +/* + * dcache_line_size - get the minimum D-cache line size from the CTR register + * on ARMv7. + */ + .macro dcache_line_size, reg, tmp + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + lsr \tmp, \tmp, #16 + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word + mov \reg, \reg, lsl \tmp @ actual cache line size + .endm + +/* + * icache_line_size - get the minimum I-cache line size from the CTR register + * on ARMv7. + */ + .macro icache_line_size, reg, tmp + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word + mov \reg, \reg, lsl \tmp @ actual cache line size + .endm + +/* + * Sanity check the PTE configuration for the code below - which makes + * certain assumptions about how these bits are laid out. + */ +#ifdef CONFIG_MMU +#if L_PTE_SHARED != PTE_EXT_SHARED +#error PTE shared bit mismatch +#endif +#if !defined (CONFIG_ARM_LPAE) && \ + (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ + L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED +#error Invalid Linux PTE bit settings +#endif +#endif /* CONFIG_MMU */ + +/* + * The ARMv6 and ARMv7 set_pte_ext translation function. + * + * Permission translation: + * YUWD APX AP1 AP0 SVC User + * 0xxx 0 0 0 no acc no acc + * 100x 1 0 1 r/o no acc + * 10x0 1 0 1 r/o no acc + * 1011 0 0 1 r/w no acc + * 110x 1 1 1 r/o r/o + * 11x0 1 1 1 r/o r/o + * 1111 0 1 1 r/w r/w + */ + .macro armv6_mt_table pfx +\pfx\()_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE + .long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long 0x00 @ L_PTE_MT_MINICACHE (not present) + .long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC + .long 0x00 @ unused + .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX @ L_PTE_MT_VECTORS + .endm + + .macro armv6_set_pte_ext pfx + str r1, [r0], #2048 @ linux version + + bic r3, r1, #0x000003fc + bic r3, r3, #PTE_TYPE_MASK + orr r3, r3, r2 + orr r3, r3, #PTE_EXT_AP0 | 2 + + adr ip, \pfx\()_mt_table + and r2, r1, #L_PTE_MT_MASK + ldr r2, [ip, r2] + + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_DIRTY|L_PTE_RDONLY + orrne r3, r3, #PTE_EXT_APX + + tst r1, #L_PTE_USER + orrne r3, r3, #PTE_EXT_AP1 + tstne r3, #PTE_EXT_APX + + @ user read-only -> kernel read-only + bicne r3, r3, #PTE_EXT_AP0 + + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN + + eor r3, r3, r2 + + tst r1, #L_PTE_YOUNG + tstne r1, #L_PTE_PRESENT + moveq r3, #0 + tstne r1, #L_PTE_NONE + movne r3, #0 + + str r3, [r0] + mcr p15, 0, r0, c7, c10, 1 @ flush_pte + .endm + + +/* + * The ARMv3, ARMv4 and ARMv5 set_pte_ext translation function, + * covering most CPUs except Xscale and Xscale 3. + * + * Permission translation: + * YUWD AP SVC User + * 0xxx 0x00 no acc no acc + * 100x 0x00 r/o no acc + * 10x0 0x00 r/o no acc + * 1011 0x55 r/w no acc + * 110x 0xaa r/w r/o + * 11x0 0xaa r/w r/o + * 1111 0xff r/w r/w + */ + .macro armv3_set_pte_ext wc_disable=1 + str r1, [r0], #2048 @ linux version + + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r3, #L_PTE_USER @ user? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? + movne r2, #0 + + .if \wc_disable +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r2, #PTE_CACHEABLE + bicne r2, r2, #PTE_BUFFERABLE +#endif + .endif + str r2, [r0] @ hardware version + .endm + + +/* + * Xscale set_pte_ext translation, split into two halves to cope + * with work-arounds. r3 must be preserved by code between these + * two macros. + * + * Permission translation: + * YUWD AP SVC User + * 0xxx 00 no acc no acc + * 100x 00 r/o no acc + * 10x0 00 r/o no acc + * 1011 01 r/w no acc + * 110x 10 r/w r/o + * 11x0 10 r/w r/o + * 1111 11 r/w r/w + */ + .macro xscale_set_pte_ext_prologue + str r1, [r0] @ linux version + + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits + orr r2, r2, #PTE_TYPE_EXT @ extended page + + tst r3, #L_PTE_USER @ user? + orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w + + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? + orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w + @ combined with user -> user r/w + .endm + + .macro xscale_set_pte_ext_epilogue + tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? + movne r2, #0 @ no -> fault + + str r2, [r0, #2048]! @ hardware version + mov ip, #0 + mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + .endm + +.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0 + .type \name\()_processor_functions, #object + .align 2 +ENTRY(\name\()_processor_functions) + .word \dabort + .word \pabort + .word cpu_\name\()_proc_init + .word cpu_\name\()_proc_fin + .word cpu_\name\()_reset + .word cpu_\name\()_do_idle + .word cpu_\name\()_dcache_clean_area + .word cpu_\name\()_switch_mm + + .if \nommu + .word 0 + .else + .word cpu_\name\()_set_pte_ext + .endif + + .if \suspend + .word cpu_\name\()_suspend_size +#ifdef CONFIG_PM_SLEEP + .word cpu_\name\()_do_suspend + .word cpu_\name\()_do_resume +#else + .word 0 + .word 0 +#endif + .else + .word 0 + .word 0 + .word 0 + .endif + + .size \name\()_processor_functions, . - \name\()_processor_functions +.endm + +.macro define_cache_functions name:req + .align 2 + .type \name\()_cache_fns, #object +ENTRY(\name\()_cache_fns) + .long \name\()_flush_icache_all + .long \name\()_flush_kern_cache_all + .long \name\()_flush_kern_cache_louis + .long \name\()_flush_user_cache_all + .long \name\()_flush_user_cache_range + .long \name\()_coherent_kern_range + .long \name\()_coherent_user_range + .long \name\()_flush_kern_dcache_area + .long \name\()_dma_map_area + .long \name\()_dma_unmap_area + .long \name\()_dma_flush_range + .size \name\()_cache_fns, . - \name\()_cache_fns +.endm + +.macro define_tlb_functions name:req, flags_up:req, flags_smp + .type \name\()_tlb_fns, #object +ENTRY(\name\()_tlb_fns) + .long \name\()_flush_user_tlb_range + .long \name\()_flush_kern_tlb_range + .ifnb \flags_smp + ALT_SMP(.long \flags_smp ) + ALT_UP(.long \flags_up ) + .else + .long \flags_up + .endif + .size \name\()_tlb_fns, . - \name\()_tlb_fns +.endm + +.macro globl_equ x, y + .globl \x + .equ \x, \y +.endm diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S new file mode 100644 index 00000000000..40acba59573 --- /dev/null +++ b/arch/arm/mm/proc-mohawk.S @@ -0,0 +1,455 @@ +/* + * linux/arch/arm/mm/proc-mohawk.S: MMU functions for Marvell PJ1 core + * + * PJ1 (codename Mohawk) is a hybrid of the xscale3 and Marvell's own core. + * + * Heavily based on proc-arm926.S and proc-xsc3.S + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be flushed. If the + * area is larger than this, then we flush the whole cache. + */ +#define CACHE_DLIMIT 32768 + +/* + * The cache line size of the L1 D cache. + */ +#define CACHE_DLINESIZE 32 + +/* + * cpu_mohawk_proc_init() + */ +ENTRY(cpu_mohawk_proc_init) + mov pc, lr + +/* + * cpu_mohawk_proc_fin() + */ +ENTRY(cpu_mohawk_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1800 @ ...iz........... + bic r0, r0, #0x0006 @ .............ca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_mohawk_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + * + * (same as arm926) + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_mohawk_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x0007 @ .............cam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_mohawk_reset) + .popsection + +/* + * cpu_mohawk_do_idle() + * + * Called with IRQs disabled + */ + .align 5 +ENTRY(cpu_mohawk_do_idle) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt + mov pc, lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(mohawk_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(mohawk_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(mohawk_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(mohawk_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + * + * (same as arm926) + */ +ENTRY(mohawk_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(mohawk_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as arm926) + */ +ENTRY(mohawk_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(mohawk_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +mohawk_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +mohawk_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(mohawk_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(mohawk_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq mohawk_dma_clean_range + bcs mohawk_dma_inv_range + b mohawk_dma_flush_range +ENDPROC(mohawk_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(mohawk_dma_unmap_area) + mov pc, lr +ENDPROC(mohawk_dma_unmap_area) + + .globl mohawk_flush_kern_cache_louis + .equ mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions mohawk + +ENTRY(cpu_mohawk_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * cpu_mohawk_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_mohawk_switch_mm) + mov ip, #0 + mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + orr r0, r0, #0x18 @ cache the page table in L2 + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_mohawk_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_mohawk_set_pte_ext) + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +.globl cpu_mohawk_suspend_size +.equ cpu_mohawk_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_mohawk_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode + mrc p15, 0, r5, c15, c1, 0 @ CP access reg + mrc p15, 0, r6, c13, c0, 0 @ PID + mrc p15, 0, r7, c3, c0, 0 @ domain ID + mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mrc p15, 0, r9, c1, c0, 0 @ control reg + bic r4, r4, #2 @ clear frequency change bit + stmia r0, {r4 - r9} @ store cp regs + ldmia sp!, {r4 - r9, pc} +ENDPROC(cpu_mohawk_do_suspend) + +ENTRY(cpu_mohawk_do_resume) + ldmia r0, {r4 - r9} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB + mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer + mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. + mcr p15, 0, r5, c15, c1, 0 @ CP access reg + mcr p15, 0, r6, c13, c0, 0 @ PID + mcr p15, 0, r7, c3, c0, 0 @ domain ID + orr r1, r1, #0x18 @ cache the page table in L2 + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_mohawk_do_resume) +#endif + + .type __mohawk_setup, #function +__mohawk_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs + orr r4, r4, #0x18 @ cache the page table in L2 + mcr p15, 0, r4, c2, c0, 0 @ load page table pointer + + mov r0, #0 @ don't allow CP access + mcr p15, 0, r0, c15, c1, 0 @ write CP access register + + adr r5, mohawk_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register + bic r0, r0, r5 + orr r0, r0, r6 + mov pc, lr + + .size __mohawk_setup, . - __mohawk_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..00 0101 + * + */ + .type mohawk_crval, #object +mohawk_crval: + crval clear=0x00007f3f, mmuset=0x00003905, ucset=0x00001134 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions mohawk, dabort=v5t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_mohawk_name, "Marvell 88SV331x" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __88sv331x_proc_info,#object +__88sv331x_proc_info: + .long 0x56158000 @ Marvell 88SV331x (MOHAWK) + .long 0xfffff000 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __mohawk_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_mohawk_name + .long mohawk_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long mohawk_cache_fns + .size __88sv331x_proc_info, . - __88sv331x_proc_info diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index bd330c4075a..c45319c8f1d 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -2,6 +2,7 @@ * linux/arch/arm/mm/proc-sa110.S * * Copyright (C) 1997-2002 Russell King + * hacked for non-paged-MM by Hyok S. Choi, 2003. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,31 +17,19 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/procinfo.h> -#include <asm/hardware.h> +#include <asm/hwcap.h> +#include <mach/hardware.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/ptrace.h> +#include "proc-macros.S" + /* * the cache line size of the I and D cache */ #define DCACHELINESIZE 32 -#define FLUSH_OFFSET 32768 - - .macro flush_110_dcache rd, ra, re - ldr \rd, =flush_base - ldr \ra, [\rd] - eor \ra, \ra, #FLUSH_OFFSET - str \ra, [\rd] - add \re, \ra, #16384 @ only necessary for 16k -1001: ldr \rd, [\ra], #DCACHELINESIZE - teq \re, \ra - bne 1001b - .endm - .data -flush_base: - .long FLUSH_BASE .text /* @@ -55,17 +44,13 @@ ENTRY(cpu_sa110_proc_init) * cpu_sa110_proc_fin() */ ENTRY(cpu_sa110_proc_fin) - stmfd sp!, {lr} - mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, ip - bl v4wb_flush_kern_cache_all @ clean caches -1: mov r0, #0 + mov r0, #0 mcr p15, 0, r0, c15, c2, 2 @ Disable clock switching mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr /* * cpu_sa110_reset(loc) @@ -77,16 +62,21 @@ ENTRY(cpu_sa110_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_sa110_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mrc p15, 0, ip, c1, c0, 0 @ ctrl register bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_sa110_reset) + .popsection /* * cpu_sa110_do_idle(type) @@ -144,57 +134,45 @@ ENTRY(cpu_sa110_dcache_clean_area) */ .align 5 ENTRY(cpu_sa110_switch_mm) - flush_110_dcache r3, ip, r1 - mov r1, #0 - mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache - mcr p15, 0, r1, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + str lr, [sp, #-4]! + bl v4wb_flush_kern_cache_all @ clears IP mcr p15, 0, r0, c2, c0, 0 @ load page table pointer - mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + ldr pc, [sp], #4 +#else mov pc, lr +#endif /* - * cpu_sa110_set_pte(ptep, pte) + * cpu_sa110_set_pte_ext(ptep, pte, ext) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_sa110_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - - str r2, [r0] @ hardware version +ENTRY(cpu_sa110_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif mov pc, lr - __INIT - .type __sa110_setup, #function __sa110_setup: mov r10, #0 mcr p15, 0, r10, c7, c7 @ invalidate I,D caches on v4 mcr p15, 0, r10, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU mcr p15, 0, r10, c8, c7 @ invalidate I,D TLBs on v4 +#endif + + adr r5, sa110_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0 @ get control register v4 - ldr r5, sa110_cr1_clear bic r0, r0, r5 - ldr r5, sa110_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 mov pc, lr .size __sa110_setup, . - __sa110_setup @@ -204,48 +182,20 @@ __sa110_setup: * ..01 0001 ..11 1101 * */ - .type sa110_cr1_clear, #object - .type sa110_cr1_set, #object -sa110_cr1_clear: - .word 0x3f3f -sa110_cr1_set: - .word 0x113d + .type sa110_crval, #object +sa110_crval: + crval clear=0x00003f3f, mmuset=0x0000113d, ucset=0x00001130 __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - - .type sa110_processor_functions, #object -ENTRY(sa110_processor_functions) - .word v4_early_abort - .word cpu_sa110_proc_init - .word cpu_sa110_proc_fin - .word cpu_sa110_reset - .word cpu_sa110_do_idle - .word cpu_sa110_dcache_clean_area - .word cpu_sa110_switch_mm - .word cpu_sa110_set_pte - .size sa110_processor_functions, . - sa110_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions sa110, dabort=v4_early_abort, pabort=legacy_pabort .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_sa110_name, #object -cpu_sa110_name: - .asciz "StrongARM-110" - .size cpu_sa110_name, . - cpu_sa110_name + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_sa110_name, "StrongARM-110" .align @@ -260,6 +210,9 @@ __sa110_proc_info: PMD_SECT_CACHEABLE | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ b __sa110_setup .long cpu_arch_name .long cpu_elf_name diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index 91b89124c0d..09d241ae2db 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -2,6 +2,7 @@ * linux/arch/arm/mm/proc-sa1100.S * * Copyright (C) 1997-2002 Russell King + * hacked for non-paged-MM by Hyok S. Choi, 2003. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,40 +22,19 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/procinfo.h> -#include <asm/hardware.h> +#include <asm/hwcap.h> +#include <mach/hardware.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> +#include "proc-macros.S" + /* * the cache line size of the I and D cache */ #define DCACHELINESIZE 32 -#define FLUSH_OFFSET 32768 - - .macro flush_1100_dcache rd, ra, re - ldr \rd, =flush_base - ldr \ra, [\rd] - eor \ra, \ra, #FLUSH_OFFSET - str \ra, [\rd] - add \re, \ra, #8192 @ only necessary for 8k -1001: ldr \rd, [\ra], #DCACHELINESIZE - teq \re, \ra - bne 1001b -#ifdef FLUSH_BASE_MINICACHE - add \ra, \ra, #FLUSH_BASE_MINICACHE - FLUSH_BASE - add \re, \ra, #512 @ only 512 bytes -1002: ldr \rd, [\ra], #DCACHELINESIZE - teq \re, \ra - bne 1002b -#endif - .endm - .data -flush_base: - .long FLUSH_BASE - .text - - __INIT + .section .text /* * cpu_sa1100_proc_init() @@ -65,8 +45,6 @@ ENTRY(cpu_sa1100_proc_init) mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland mov pc, lr - .previous - /* * cpu_sa1100_proc_fin() * @@ -75,17 +53,12 @@ ENTRY(cpu_sa1100_proc_init) * - Clean and turn off caches. */ ENTRY(cpu_sa1100_proc_fin) - stmfd sp!, {lr} - mov ip, #PSR_F_BIT | PSR_I_BIT | SVC_MODE - msr cpsr_c, ip - flush_1100_dcache r0, r1, r2 @ clean caches - mov r0, #0 - mcr p15, 0, r0, c15, c2, 2 @ Disable clock switching + mcr p15, 0, ip, c15, c2, 2 @ Disable clock switching mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr /* * cpu_sa1100_reset(loc) @@ -97,16 +70,21 @@ ENTRY(cpu_sa1100_proc_fin) * loc: location to jump to for soft reset */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_sa1100_reset) mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif mrc p15, 0, ip, c1, c0, 0 @ ctrl register bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register mov pc, r0 +ENDPROC(cpu_sa1100_reset) + .popsection /* * cpu_sa1100_do_idle(type) @@ -166,58 +144,73 @@ ENTRY(cpu_sa1100_dcache_clean_area) */ .align 5 ENTRY(cpu_sa1100_switch_mm) - flush_1100_dcache r3, ip, r1 - mov ip, #0 - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache +#ifdef CONFIG_MMU + str lr, [sp, #-4]! + bl v4wb_flush_kern_cache_all @ clears IP mcr p15, 0, ip, c9, c0, 0 @ invalidate RB - mcr p15, 0, ip, c7, c10, 4 @ drain WB mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + ldr pc, [sp], #4 +#else mov pc, lr +#endif /* - * cpu_sa1100_set_pte(ptep, pte) + * cpu_sa1100_set_pte_ext(ptep, pte, ext) * * Set a PTE and flush it out */ .align 5 -ENTRY(cpu_sa1100_set_pte) - str r1, [r0], #-2048 @ linux version - - eor r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - bic r2, r1, #PTE_SMALL_AP_MASK - bic r2, r2, #PTE_TYPE_MASK - orr r2, r2, #PTE_TYPE_SMALL - - tst r1, #L_PTE_USER @ User? - orrne r2, r2, #PTE_SMALL_AP_URO_SRW - - tst r1, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_SMALL_AP_UNO_SRW - - tst r1, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 - - str r2, [r0] @ hardware version +ENTRY(cpu_sa1100_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif mov pc, lr - __INIT +.globl cpu_sa1100_suspend_size +.equ cpu_sa1100_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_sa1100_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c3, c0, 0 @ domain ID + mrc p15, 0, r5, c13, c0, 0 @ PID + mrc p15, 0, r6, c1, c0, 0 @ control reg + stmia r0, {r4 - r6} @ store cp regs + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_sa1100_do_suspend) + +ENTRY(cpu_sa1100_do_resume) + ldmia r0, {r4 - r6} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ flush I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ flush I&D cache + mcr p15, 0, ip, c9, c0, 0 @ invalidate RB + mcr p15, 0, ip, c9, c0, 5 @ allow user space to use RB + + mcr p15, 0, r4, c3, c0, 0 @ domain ID + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r5, c13, c0, 0 @ PID + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_sa1100_do_resume) +#endif .type __sa1100_setup, #function __sa1100_setup: mov r0, #0 mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, sa1100_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0 @ get control register v4 - ldr r5, sa1100_cr1_clear bic r0, r0, r5 - ldr r5, sa1100_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 mov pc, lr .size __sa1100_setup, . - __sa1100_setup @@ -227,97 +220,54 @@ __sa1100_setup: * ..11 0001 ..11 1101 * */ - .type sa1100_cr1_clear, #object - .type sa1100_cr1_set, #object -sa1100_cr1_clear: - .word 0x3f3f -sa1100_cr1_set: - .word 0x313d + .type sa1100_crval, #object +sa1100_crval: + crval clear=0x00003f3f, mmuset=0x0000313d, ucset=0x00001130 __INITDATA /* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - -/* * SA1100 and SA1110 share the same function calls */ - .type sa1100_processor_functions, #object -ENTRY(sa1100_processor_functions) - .word v4_early_abort - .word cpu_sa1100_proc_init - .word cpu_sa1100_proc_fin - .word cpu_sa1100_reset - .word cpu_sa1100_do_idle - .word cpu_sa1100_dcache_clean_area - .word cpu_sa1100_switch_mm - .word cpu_sa1100_set_pte - .size sa1100_processor_functions, . - sa1100_processor_functions - - .section ".rodata" - - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv4" - .size cpu_arch_name, . - cpu_arch_name - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v4" - .size cpu_elf_name, . - cpu_elf_name + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions sa1100, dabort=v4_early_abort, pabort=legacy_pabort, suspend=1 - .type cpu_sa1100_name, #object -cpu_sa1100_name: - .asciz "StrongARM-1100" - .size cpu_sa1100_name, . - cpu_sa1100_name + .section ".rodata" - .type cpu_sa1110_name, #object -cpu_sa1110_name: - .asciz "StrongARM-1110" - .size cpu_sa1110_name, . - cpu_sa1110_name + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_sa1100_name, "StrongARM-1100" + string cpu_sa1110_name, "StrongARM-1110" .align .section ".proc.info.init", #alloc, #execinstr - .type __sa1100_proc_info,#object -__sa1100_proc_info: - .long 0x4401a110 - .long 0xfffffff0 +.macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask .long PMD_TYPE_SECT | \ PMD_SECT_BUFFERABLE | \ PMD_SECT_CACHEABLE | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __sa1100_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT - .long cpu_sa1100_name - .long sa1100_processor_functions - .long v4wb_tlb_fns - .long v4_mc_user_fns - .long v4wb_cache_fns - .size __sa1100_proc_info, . - __sa1100_proc_info - - .type __sa1110_proc_info,#object -__sa1110_proc_info: - .long 0x6901b110 - .long 0xfffffff0 .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ b __sa1100_setup .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT - .long cpu_sa1110_name + .long \cpu_name .long sa1100_processor_functions .long v4wb_tlb_fns .long v4_mc_user_fns .long v4wb_cache_fns - .size __sa1110_proc_info, . - __sa1110_proc_info + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + sa1100_proc_info sa1100, 0x4401a110, 0xfffffff0, cpu_sa1100_name + sa1100_proc_info sa1110, 0x6901b110, 0xfffffff0, cpu_sa1110_name diff --git a/arch/arm/mm/proc-syms.c b/arch/arm/mm/proc-syms.c index 6c5f0fe578a..054b491ff76 100644 --- a/arch/arm/mm/proc-syms.c +++ b/arch/arm/mm/proc-syms.c @@ -13,10 +13,13 @@ #include <asm/cacheflush.h> #include <asm/proc-fns.h> #include <asm/tlbflush.h> +#include <asm/page.h> #ifndef MULTI_CPU EXPORT_SYMBOL(cpu_dcache_clean_area); -EXPORT_SYMBOL(cpu_set_pte); +#ifdef CONFIG_MMU +EXPORT_SYMBOL(cpu_set_pte_ext); +#endif #else EXPORT_SYMBOL(processor); #endif @@ -26,10 +29,20 @@ EXPORT_SYMBOL(__cpuc_flush_kern_all); EXPORT_SYMBOL(__cpuc_flush_user_all); EXPORT_SYMBOL(__cpuc_flush_user_range); EXPORT_SYMBOL(__cpuc_coherent_kern_range); +EXPORT_SYMBOL(__cpuc_flush_dcache_area); #else EXPORT_SYMBOL(cpu_cache); #endif +#ifdef CONFIG_MMU +#ifndef MULTI_USER +EXPORT_SYMBOL(__cpu_clear_user_highpage); +EXPORT_SYMBOL(__cpu_copy_user_highpage); +#else +EXPORT_SYMBOL(cpu_user); +#endif +#endif + /* * No module should need to touch the TLB (and currently * no modules do. We export this for "loadkernel" support diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 9bb5fff406f..32b3558321c 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -2,6 +2,7 @@ * linux/arch/arm/mm/proc-v6.S * * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Modified by Catalin Marinas for noMMU support * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,60 +10,40 @@ * * This is the "shell" of the ARMv6 processor support. */ +#include <linux/init.h> #include <linux/linkage.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> -#include <asm/procinfo.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include "proc-macros.S" #define D_CACHE_LINE_SIZE 32 - .macro cpsie, flags - .ifc \flags, f - .long 0xf1080040 - .exitm - .endif - .ifc \flags, i - .long 0xf1080080 - .exitm - .endif - .ifc \flags, if - .long 0xf10800c0 - .exitm - .endif - .err - .endm +#define TTB_C (1 << 0) +#define TTB_S (1 << 1) +#define TTB_IMP (1 << 2) +#define TTB_RGN_NC (0 << 3) +#define TTB_RGN_WBWA (1 << 3) +#define TTB_RGN_WT (2 << 3) +#define TTB_RGN_WB (3 << 3) - .macro cpsid, flags - .ifc \flags, f - .long 0xf10c0040 - .exitm - .endif - .ifc \flags, i - .long 0xf10c0080 - .exitm - .endif - .ifc \flags, if - .long 0xf10c00c0 - .exitm - .endif - .err - .endm +#define TTB_FLAGS_UP TTB_RGN_WBWA +#define PMD_FLAGS_UP PMD_SECT_WB +#define TTB_FLAGS_SMP TTB_RGN_WBWA|TTB_S +#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S ENTRY(cpu_v6_proc_init) mov pc, lr ENTRY(cpu_v6_proc_fin) - stmfd sp!, {lr} - cpsid if @ disable interrupts - bl v6_flush_kern_cache_all mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x0006 @ .............ca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldmfd sp!, {pc} + mov pc, lr /* * cpu_v6_reset(loc) @@ -72,12 +53,18 @@ ENTRY(cpu_v6_proc_fin) * to what would be the reset vector. * * - loc - location to jump to for soft reset - * - * It is assumed that: */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_v6_reset) + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x1 @ ...............m + mcr p15, 0, r1, c1, c0, 0 @ disable MMU + mov r1, #0 + mcr p15, 0, r1, c7, c5, 4 @ ISB mov pc, r0 +ENDPROC(cpu_v6_reset) + .popsection /* * cpu_v6_do_idle() @@ -87,20 +74,20 @@ ENTRY(cpu_v6_reset) * IRQs are already disabled. */ ENTRY(cpu_v6_do_idle) + mov r1, #0 + mcr p15, 0, r1, c7, c10, 4 @ DWB - WFI may enter a low-power mode mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt mov pc, lr ENTRY(cpu_v6_dcache_clean_area) -#ifndef TLB_CAN_READ_FROM_L1_CACHE 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #D_CACHE_LINE_SIZE subs r1, r1, #D_CACHE_LINE_SIZE bhi 1b -#endif mov pc, lr /* - * cpu_arm926_switch_mm(pgd_phys, tsk) + * cpu_v6_switch_mm(pgd_phys, tsk) * * Set the translation table base pointer to be pgd_phys * @@ -110,71 +97,89 @@ ENTRY(cpu_v6_dcache_clean_area) * - we are not using split page tables */ ENTRY(cpu_v6_switch_mm) +#ifdef CONFIG_MMU mov r2, #0 - ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id + mmid r1, r1 @ get mm->context.id + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB mcr p15, 0, r2, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrc p15, 0, r2, c13, c0, 1 @ read current context ID + bic r2, r2, #0xff @ extract the PID + and r1, r1, #0xff + orr r1, r1, r2 @ insert into new context ID +#endif mcr p15, 0, r1, c13, c0, 1 @ set context ID +#endif mov pc, lr /* - * cpu_v6_set_pte(ptep, pte) + * cpu_v6_set_pte_ext(ptep, pte, ext) * * Set a level 2 translation table entry. * * - ptep - pointer to level 2 translation table entry * (hardware version is stored at -1024 bytes) * - pte - PTE value to store - * - * Permissions: - * YUWD APX AP1 AP0 SVC User - * 0xxx 0 0 0 no acc no acc - * 100x 1 0 1 r/o no acc - * 10x0 1 0 1 r/o no acc - * 1011 0 0 1 r/w no acc - * 110x 0 1 0 r/w r/o - * 11x0 0 1 0 r/w r/o - * 1111 0 1 1 r/w r/w + * - ext - value for extended PTE bits */ -ENTRY(cpu_v6_set_pte) - str r1, [r0], #-2048 @ linux version + armv6_mt_table cpu_v6 - bic r2, r1, #0x000007f0 - bic r2, r2, #0x00000003 - orr r2, r2, #PTE_EXT_AP0 | 2 - - tst r1, #L_PTE_WRITE - tstne r1, #L_PTE_DIRTY - orreq r2, r2, #PTE_EXT_APX - - tst r1, #L_PTE_USER - orrne r2, r2, #PTE_EXT_AP1 - tstne r2, #PTE_EXT_APX - bicne r2, r2, #PTE_EXT_APX | PTE_EXT_AP0 - - tst r1, #L_PTE_YOUNG - biceq r2, r2, #PTE_EXT_APX | PTE_EXT_AP_MASK - -@ tst r1, #L_PTE_EXEC -@ orreq r2, r2, #PTE_EXT_XN - - tst r1, #L_PTE_PRESENT - moveq r2, #0 - - str r2, [r0] - mcr p15, 0, r0, c7, c10, 1 @ flush_pte +ENTRY(cpu_v6_set_pte_ext) +#ifdef CONFIG_MMU + armv6_set_pte_ext cpu_v6 +#endif mov pc, lr +/* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ +.globl cpu_v6_suspend_size +.equ cpu_v6_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_v6_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c2, c0, 1 @ Translation table base 1 +#endif + mrc p15, 0, r7, c1, c0, 1 @ auxiliary control register + mrc p15, 0, r8, c1, c0, 2 @ co-processor access control + mrc p15, 0, r9, c1, c0, 0 @ control register + stmia r0, {r4 - r9} + ldmfd sp!, {r4- r9, pc} +ENDPROC(cpu_v6_do_suspend) +ENTRY(cpu_v6_do_resume) + mov ip, #0 + mcr p15, 0, ip, c7, c14, 0 @ clean+invalidate D cache + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c15, 0 @ clean+invalidate cache + mcr p15, 0, ip, c7, c10, 4 @ drain write buffer + mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID + ldmia r0, {r4 - r9} + mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) + ALT_UP(orr r1, r1, #TTB_FLAGS_UP) + mcr p15, 0, r1, c2, c0, 0 @ Translation table base 0 + mcr p15, 0, r6, c2, c0, 1 @ Translation table base 1 + mcr p15, 0, ip, c2, c0, 2 @ TTB control register +#endif + mcr p15, 0, r7, c1, c0, 1 @ auxiliary control register + mcr p15, 0, r8, c1, c0, 2 @ co-processor access control + mcr p15, 0, ip, c7, c5, 4 @ ISB + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_v6_do_resume) +#endif + string cpu_v6_name, "ARMv6-compatible processor" -cpu_v6_name: - .asciz "Some Random V6 Processor" .align - .section ".text.init", #alloc, #execinstr - /* * __v6_setup * @@ -191,24 +196,51 @@ cpu_v6_name: * - cache type register is implemented */ __v6_setup: +#ifdef CONFIG_SMP + ALT_SMP(mrc p15, 0, r0, c1, c0, 1) @ Enable SMP/nAMP mode + ALT_UP(nop) + orr r0, r0, #0x20 + ALT_SMP(mcr p15, 0, r0, c1, c0, 1) + ALT_UP(nop) +#endif + mov r0, #0 mcr p15, 0, r0, c7, c14, 0 @ clean+invalidate D cache mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c15, 0 @ clean+invalidate cache - mcr p15, 0, r0, c7, c10, 4 @ drain write buffer +#ifdef CONFIG_MMU mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs mcr p15, 0, r0, c2, c0, 2 @ TTB control register - mcr p15, 0, r4, c2, c0, 1 @ load TTB1 -#ifdef CONFIG_VFP - mrc p15, 0, r0, c1, c0, 2 - orr r0, r0, #(0xf << 20) - mcr p15, 0, r0, c1, c0, 2 @ Enable full access to VFP -#endif + ALT_SMP(orr r4, r4, #TTB_FLAGS_SMP) + ALT_UP(orr r4, r4, #TTB_FLAGS_UP) + ALT_SMP(orr r8, r8, #TTB_FLAGS_SMP) + ALT_UP(orr r8, r8, #TTB_FLAGS_UP) + mcr p15, 0, r8, c2, c0, 1 @ load TTB1 +#endif /* CONFIG_MMU */ + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer and + @ complete invalidations + adr r5, v6_crval + ldmia r5, {r5, r6} + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables mrc p15, 0, r0, c1, c0, 0 @ read control register - ldr r5, v6_cr1_clear @ get mask for bits to clear bic r0, r0, r5 @ clear bits them - ldr r5, v6_cr1_set @ get mask for bits to set - orr r0, r0, r5 @ set them + orr r0, r0, r6 @ set them +#ifdef CONFIG_ARM_ERRATA_364296 + /* + * Workaround for the 364296 ARM1136 r0p2 erratum (possible cache data + * corruption with hit-under-miss enabled). The conditional code below + * (setting the undocumented bit 31 in the auxiliary control register + * and the FI bit in the control register) disables hit-under-miss + * without putting the processor into full low interrupt latency mode. + */ + ldr r6, =0x4107b362 @ id for ARM1136 r0p2 + mrc p15, 0, r5, c0, c0, 0 @ get processor id + teq r5, r6 @ check for the faulty core + mrceq p15, 0, r5, c1, c0, 1 @ load aux control reg + orreq r5, r5, #(1 << 31) @ set the undocumented bit 31 + mcreq p15, 0, r5, c1, c0, 1 @ write aux control reg + orreq r0, r0, #(1 << 21) @ low interrupt latency configuration +#endif mov pc, lr @ return to head.S:__ret /* @@ -217,34 +249,19 @@ __v6_setup: * rrrr rrrx xxx0 0101 xxxx xxxx x111 xxxx < forced * 0 110 0011 1.00 .111 1101 < we want */ - .type v6_cr1_clear, #object - .type v6_cr1_set, #object -v6_cr1_clear: - .word 0x01e0fb7f -v6_cr1_set: - .word 0x00c0387d + .type v6_crval, #object +v6_crval: + crval clear=0x01e0fb7f, mmuset=0x00c0387d, ucset=0x00c0187c - .type v6_processor_functions, #object -ENTRY(v6_processor_functions) - .word v6_early_abort - .word cpu_v6_proc_init - .word cpu_v6_proc_fin - .word cpu_v6_reset - .word cpu_v6_do_idle - .word cpu_v6_dcache_clean_area - .word cpu_v6_switch_mm - .word cpu_v6_set_pte - .size v6_processor_functions, . - v6_processor_functions + __INITDATA - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv6" - .size cpu_arch_name, . - cpu_arch_name + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions v6, dabort=v6_early_abort, pabort=v6_pabort, suspend=1 - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v6" - .size cpu_elf_name, . - cpu_elf_name + .section ".rodata" + + string cpu_arch_name, "armv6" + string cpu_elf_name, "v6" .align .section ".proc.info.init", #alloc, #execinstr @@ -256,15 +273,25 @@ cpu_elf_name: __v6_proc_info: .long 0x0007b000 .long 0x0007f000 + ALT_SMP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_SMP) + ALT_UP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_UP) .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ + PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ b __v6_setup .long cpu_arch_name .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_VFP|HWCAP_EDSP|HWCAP_JAVA + /* See also feat_v6_fixup() for HWCAP_TLS */ + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA|HWCAP_TLS .long cpu_v6_name .long v6_processor_functions .long v6wbi_tlb_fns diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S new file mode 100644 index 00000000000..1f52915f2b2 --- /dev/null +++ b/arch/arm/mm/proc-v7-2level.S @@ -0,0 +1,165 @@ +/* + * arch/arm/mm/proc-v7-2level.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define TTB_S (1 << 1) +#define TTB_RGN_NC (0 << 3) +#define TTB_RGN_OC_WBWA (1 << 3) +#define TTB_RGN_OC_WT (2 << 3) +#define TTB_RGN_OC_WB (3 << 3) +#define TTB_NOS (1 << 5) +#define TTB_IRGN_NC ((0 << 0) | (0 << 6)) +#define TTB_IRGN_WBWA ((0 << 0) | (1 << 6)) +#define TTB_IRGN_WT ((1 << 0) | (0 << 6)) +#define TTB_IRGN_WB ((1 << 0) | (1 << 6)) + +/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ +#define TTB_FLAGS_UP TTB_IRGN_WB|TTB_RGN_OC_WB +#define PMD_FLAGS_UP PMD_SECT_WB + +/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ +#define TTB_FLAGS_SMP TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA +#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S + +/* + * cpu_v7_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys + * + * - pgd_phys - physical address of new TTB + * + * It is assumed that: + * - we are not using split page tables + */ +ENTRY(cpu_v7_switch_mm) +#ifdef CONFIG_MMU + mov r2, #0 + mmid r1, r1 @ get mm->context.id + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) +#ifdef CONFIG_ARM_ERRATA_430973 + mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB +#endif +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrc p15, 0, r2, c13, c0, 1 @ read current context ID + lsr r2, r2, #8 @ extract the PID + bfi r1, r2, #8, #24 @ insert into new context ID +#endif +#ifdef CONFIG_ARM_ERRATA_754322 + dsb +#endif + mcr p15, 0, r1, c13, c0, 1 @ set context ID + isb + mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 + isb +#endif + mov pc, lr +ENDPROC(cpu_v7_switch_mm) + +/* + * cpu_v7_set_pte_ext(ptep, pte) + * + * Set a level 2 translation table entry. + * + * - ptep - pointer to level 2 translation table entry + * (hardware version is stored at +2048 bytes) + * - pte - PTE value to store + * - ext - value for extended PTE bits + */ +ENTRY(cpu_v7_set_pte_ext) +#ifdef CONFIG_MMU + str r1, [r0] @ linux version + + bic r3, r1, #0x000003f0 + bic r3, r3, #PTE_TYPE_MASK + orr r3, r3, r2 + orr r3, r3, #PTE_EXT_AP0 | 2 + + tst r1, #1 << 4 + orrne r3, r3, #PTE_EXT_TEX(1) + + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_RDONLY | L_PTE_DIRTY + orrne r3, r3, #PTE_EXT_APX + + tst r1, #L_PTE_USER + orrne r3, r3, #PTE_EXT_AP1 + + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN + + tst r1, #L_PTE_YOUNG + tstne r1, #L_PTE_VALID + eorne r1, r1, #L_PTE_NONE + tstne r1, #L_PTE_NONE + moveq r3, #0 + + ARM( str r3, [r0, #2048]! ) + THUMB( add r0, r0, #2048 ) + THUMB( str r3, [r0] ) + ALT_SMP(W(nop)) + ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte +#endif + mov pc, lr +ENDPROC(cpu_v7_set_pte_ext) + + /* + * Memory region attributes with SCTLR.TRE=1 + * + * n = TEX[0],C,B + * TR = PRRR[2n+1:2n] - memory type + * IR = NMRR[2n+1:2n] - inner cacheable property + * OR = NMRR[2n+17:2n+16] - outer cacheable property + * + * n TR IR OR + * UNCACHED 000 00 + * BUFFERABLE 001 10 00 00 + * WRITETHROUGH 010 10 10 10 + * WRITEBACK 011 10 11 11 + * reserved 110 + * WRITEALLOC 111 10 01 01 + * DEV_SHARED 100 01 + * DEV_NONSHARED 100 01 + * DEV_WC 001 10 + * DEV_CACHED 011 10 + * + * Other attributes: + * + * DS0 = PRRR[16] = 0 - device shareable property + * DS1 = PRRR[17] = 1 - device shareable property + * NS0 = PRRR[18] = 0 - normal shareable property + * NS1 = PRRR[19] = 1 - normal shareable property + * NOS = PRRR[24+n] = 1 - not outer shareable + */ +.equ PRRR, 0xff0a81a8 +.equ NMRR, 0x40e040e0 + + /* + * Macro for setting up the TTBRx and TTBCR registers. + * - \ttb0 and \ttb1 updated with the corresponding flags. + */ + .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + mcr p15, 0, \zero, c2, c0, 2 @ TTB control register + ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP) + ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) + mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 + .endm + + /* AT + * TFR EV X F I D LR S + * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM + * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 01 0 110 0011 1100 .111 1101 < we want + */ + .align 2 + .type v7_crval, #object +v7_crval: + crval clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S new file mode 100644 index 00000000000..22e3ad63500 --- /dev/null +++ b/arch/arm/mm/proc-v7-3level.S @@ -0,0 +1,161 @@ +/* + * arch/arm/mm/proc-v7-3level.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2011 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * based on arch/arm/mm/proc-v7-2level.S + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define TTB_IRGN_NC (0 << 8) +#define TTB_IRGN_WBWA (1 << 8) +#define TTB_IRGN_WT (2 << 8) +#define TTB_IRGN_WB (3 << 8) +#define TTB_RGN_NC (0 << 10) +#define TTB_RGN_OC_WBWA (1 << 10) +#define TTB_RGN_OC_WT (2 << 10) +#define TTB_RGN_OC_WB (3 << 10) +#define TTB_S (3 << 12) +#define TTB_EAE (1 << 31) + +/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ +#define TTB_FLAGS_UP (TTB_IRGN_WB|TTB_RGN_OC_WB) +#define PMD_FLAGS_UP (PMD_SECT_WB) + +/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ +#define TTB_FLAGS_SMP (TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA) +#define PMD_FLAGS_SMP (PMD_SECT_WBWA|PMD_SECT_S) + +#ifndef __ARMEB__ +# define rpgdl r0 +# define rpgdh r1 +#else +# define rpgdl r1 +# define rpgdh r0 +#endif + +/* + * cpu_v7_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys (physical address of + * the new TTB). + */ +ENTRY(cpu_v7_switch_mm) +#ifdef CONFIG_MMU + mmid r2, r2 + asid r2, r2 + orr rpgdh, rpgdh, r2, lsl #(48 - 32) @ upper 32-bits of pgd + mcrr p15, 0, rpgdl, rpgdh, c2 @ set TTB 0 + isb +#endif + mov pc, lr +ENDPROC(cpu_v7_switch_mm) + +#ifdef __ARMEB__ +#define rl r3 +#define rh r2 +#else +#define rl r2 +#define rh r3 +#endif + +/* + * cpu_v7_set_pte_ext(ptep, pte) + * + * Set a level 2 translation table entry. + * - ptep - pointer to level 3 translation table entry + * - pte - PTE value to store (64-bit in r2 and r3) + */ +ENTRY(cpu_v7_set_pte_ext) +#ifdef CONFIG_MMU + tst rl, #L_PTE_VALID + beq 1f + tst rh, #1 << (57 - 32) @ L_PTE_NONE + bicne rl, #L_PTE_VALID + bne 1f + tst rh, #1 << (55 - 32) @ L_PTE_DIRTY + orreq rl, #L_PTE_RDONLY +1: strd r2, r3, [r0] + ALT_SMP(W(nop)) + ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte +#endif + mov pc, lr +ENDPROC(cpu_v7_set_pte_ext) + + /* + * Memory region attributes for LPAE (defined in pgtable-3level.h): + * + * n = AttrIndx[2:0] + * + * n MAIR + * UNCACHED 000 00000000 + * BUFFERABLE 001 01000100 + * DEV_WC 001 01000100 + * WRITETHROUGH 010 10101010 + * WRITEBACK 011 11101110 + * DEV_CACHED 011 11101110 + * DEV_SHARED 100 00000100 + * DEV_NONSHARED 100 00000100 + * unused 101 + * unused 110 + * WRITEALLOC 111 11111111 + */ +.equ PRRR, 0xeeaa4400 @ MAIR0 +.equ NMRR, 0xff000004 @ MAIR1 + + /* + * Macro for setting up the TTBRx and TTBCR registers. + * - \ttbr1 updated. + */ + .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address + mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT + cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? + mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register + orr \tmp, \tmp, #TTB_EAE + ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) + ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP) + ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP << 16) + ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP << 16) + /* + * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above), + * otherwise booting secondary CPUs would end up using TTBR1 for the + * identity mapping set up in TTBR0. + */ + orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ + mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR + mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits + mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits + addls \ttbr1, \ttbr1, #TTBR1_OFFSET + mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits + mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits + mcrr p15, 0, \ttbr0, \zero, c2 @ load TTBR0 + mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + mcrr p15, 0, \ttbr0, \zero, c2 @ load TTBR0 + .endm + + /* + * AT + * TFR EV X F IHD LR S + * .EEE ..EE PUI. .TAT 4RVI ZWRS BLDP WCAM + * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 11 0 110 1 0011 1100 .111 1101 < we want + */ + .align 2 + .type v7_crval, #object +v7_crval: + crval clear=0x0120c302, mmuset=0x30c23c7d, ucset=0x00c01c7c diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S new file mode 100644 index 00000000000..3db2c2f04a3 --- /dev/null +++ b/arch/arm/mm/proc-v7.S @@ -0,0 +1,563 @@ +/* + * linux/arch/arm/mm/proc-v7.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv7 processor support. + */ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> + +#include "proc-macros.S" + +#ifdef CONFIG_ARM_LPAE +#include "proc-v7-3level.S" +#else +#include "proc-v7-2level.S" +#endif + +ENTRY(cpu_v7_proc_init) + mov pc, lr +ENDPROC(cpu_v7_proc_init) + +ENTRY(cpu_v7_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x0006 @ .............ca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr +ENDPROC(cpu_v7_proc_fin) + +/* + * cpu_v7_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * - loc - location to jump to for soft reset + * + * This code must be executed using a flat identity mapping with + * caches disabled. + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_v7_reset) + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x1 @ ...............m + THUMB( bic r1, r1, #1 << 30 ) @ SCTLR.TE (Thumb exceptions) + mcr p15, 0, r1, c1, c0, 0 @ disable MMU + isb + bx r0 +ENDPROC(cpu_v7_reset) + .popsection + +/* + * cpu_v7_do_idle() + * + * Idle the processor (eg, wait for interrupt). + * + * IRQs are already disabled. + */ +ENTRY(cpu_v7_do_idle) + dsb @ WFI may enter a low-power mode + wfi + mov pc, lr +ENDPROC(cpu_v7_do_idle) + +ENTRY(cpu_v7_dcache_clean_area) + ALT_SMP(W(nop)) @ MP extensions imply L1 PTW + ALT_UP_B(1f) + mov pc, lr +1: dcache_line_size r2, r3 +2: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, r2 + subs r1, r1, r2 + bhi 2b + dsb ishst + mov pc, lr +ENDPROC(cpu_v7_dcache_clean_area) + + string cpu_v7_name, "ARMv7 Processor" + .align + +/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */ +.globl cpu_v7_suspend_size +.equ cpu_v7_suspend_size, 4 * 9 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_v7_do_suspend) + stmfd sp!, {r4 - r10, lr} + mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID + mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID + stmia r0!, {r4 - r5} +#ifdef CONFIG_MMU + mrc p15, 0, r6, c3, c0, 0 @ Domain ID +#ifdef CONFIG_ARM_LPAE + mrrc p15, 1, r5, r7, c2 @ TTB 1 +#else + mrc p15, 0, r7, c2, c0, 1 @ TTB 1 +#endif + mrc p15, 0, r11, c2, c0, 2 @ TTB control register +#endif + mrc p15, 0, r8, c1, c0, 0 @ Control register + mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register + mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control + stmia r0, {r5 - r11} + ldmfd sp!, {r4 - r10, pc} +ENDPROC(cpu_v7_do_suspend) + +ENTRY(cpu_v7_do_resume) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID + ldmia r0!, {r4 - r5} + mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID + mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID + ldmia r0, {r5 - r11} +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs + mcr p15, 0, r6, c3, c0, 0 @ Domain ID +#ifdef CONFIG_ARM_LPAE + mcrr p15, 0, r1, ip, c2 @ TTB 0 + mcrr p15, 1, r5, r7, c2 @ TTB 1 +#else + ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) + ALT_UP(orr r1, r1, #TTB_FLAGS_UP) + mcr p15, 0, r1, c2, c0, 0 @ TTB 0 + mcr p15, 0, r7, c2, c0, 1 @ TTB 1 +#endif + mcr p15, 0, r11, c2, c0, 2 @ TTB control register + ldr r4, =PRRR @ PRRR + ldr r5, =NMRR @ NMRR + mcr p15, 0, r4, c10, c2, 0 @ write PRRR + mcr p15, 0, r5, c10, c2, 1 @ write NMRR +#endif /* CONFIG_MMU */ + mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register + teq r4, r9 @ Is it already set? + mcrne p15, 0, r9, c1, c0, 1 @ No, so write it + mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control + isb + dsb + mov r0, r8 @ control register + b cpu_resume_mmu +ENDPROC(cpu_v7_do_resume) +#endif + +#ifdef CONFIG_CPU_PJ4B + globl_equ cpu_pj4b_switch_mm, cpu_v7_switch_mm + globl_equ cpu_pj4b_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_pj4b_proc_init, cpu_v7_proc_init + globl_equ cpu_pj4b_proc_fin, cpu_v7_proc_fin + globl_equ cpu_pj4b_reset, cpu_v7_reset +#ifdef CONFIG_PJ4B_ERRATA_4742 +ENTRY(cpu_pj4b_do_idle) + dsb @ WFI may enter a low-power mode + wfi + dsb @barrier + mov pc, lr +ENDPROC(cpu_pj4b_do_idle) +#else + globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle +#endif + globl_equ cpu_pj4b_dcache_clean_area, cpu_v7_dcache_clean_area +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_pj4b_do_suspend) + stmfd sp!, {r6 - r10} + mrc p15, 1, r6, c15, c1, 0 @ save CP15 - extra features + mrc p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0 + mrc p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2 + mrc p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1 + mrc p15, 0, r10, c9, c14, 0 @ save CP15 - PMC + stmia r0!, {r6 - r10} + ldmfd sp!, {r6 - r10} + b cpu_v7_do_suspend +ENDPROC(cpu_pj4b_do_suspend) + +ENTRY(cpu_pj4b_do_resume) + ldmia r0!, {r6 - r10} + mcr p15, 1, r6, c15, c1, 0 @ save CP15 - extra features + mcr p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0 + mcr p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2 + mcr p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1 + mcr p15, 0, r10, c9, c14, 0 @ save CP15 - PMC + b cpu_v7_do_resume +ENDPROC(cpu_pj4b_do_resume) +#endif +.globl cpu_pj4b_suspend_size +.equ cpu_pj4b_suspend_size, 4 * 14 + +#endif + +/* + * __v7_setup + * + * Initialise TLB, Caches, and MMU state ready to switch the MMU + * on. Return in r0 the new CP15 C1 control register setting. + * + * This should be able to cover all ARMv7 cores. + * + * It is assumed that: + * - cache type register is implemented + */ +__v7_ca5mp_setup: +__v7_ca9mp_setup: +__v7_cr7mp_setup: + mov r10, #(1 << 0) @ Cache/TLB ops broadcasting + b 1f +__v7_ca7mp_setup: +__v7_ca12mp_setup: +__v7_ca15mp_setup: +__v7_ca17mp_setup: + mov r10, #0 +1: +#ifdef CONFIG_SMP + ALT_SMP(mrc p15, 0, r0, c1, c0, 1) + ALT_UP(mov r0, #(1 << 6)) @ fake it for UP + tst r0, #(1 << 6) @ SMP/nAMP mode enabled? + orreq r0, r0, #(1 << 6) @ Enable SMP/nAMP mode + orreq r0, r0, r10 @ Enable CPU-specific SMP bits + mcreq p15, 0, r0, c1, c0, 1 +#endif + b __v7_setup + +__v7_pj4b_setup: +#ifdef CONFIG_CPU_PJ4B + +/* Auxiliary Debug Modes Control 1 Register */ +#define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */ +#define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */ +#define PJ4B_BCK_OFF_STREX (1 << 5) /* Enable the back off of STREX instr */ +#define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */ + +/* Auxiliary Debug Modes Control 2 Register */ +#define PJ4B_FAST_LDR (1 << 23) /* Disable fast LDR */ +#define PJ4B_SNOOP_DATA (1 << 25) /* Do not interleave write and snoop data */ +#define PJ4B_CWF (1 << 27) /* Disable Critical Word First feature */ +#define PJ4B_OUTSDNG_NC (1 << 29) /* Disable outstanding non cacheable rqst */ +#define PJ4B_L1_REP_RR (1 << 30) /* L1 replacement - Strict round robin */ +#define PJ4B_AUX_DBG_CTRL2 (PJ4B_SNOOP_DATA | PJ4B_CWF |\ + PJ4B_OUTSDNG_NC | PJ4B_L1_REP_RR) + +/* Auxiliary Functional Modes Control Register 0 */ +#define PJ4B_SMP_CFB (1 << 1) /* Set SMP mode. Join the coherency fabric */ +#define PJ4B_L1_PAR_CHK (1 << 2) /* Support L1 parity checking */ +#define PJ4B_BROADCAST_CACHE (1 << 8) /* Broadcast Cache and TLB maintenance */ + +/* Auxiliary Debug Modes Control 0 Register */ +#define PJ4B_WFI_WFE (1 << 22) /* WFI/WFE - serve the DVM and back to idle */ + + /* Auxiliary Debug Modes Control 1 Register */ + mrc p15, 1, r0, c15, c1, 1 + orr r0, r0, #PJ4B_CLEAN_LINE + orr r0, r0, #PJ4B_BCK_OFF_STREX + orr r0, r0, #PJ4B_INTER_PARITY + bic r0, r0, #PJ4B_STATIC_BP + mcr p15, 1, r0, c15, c1, 1 + + /* Auxiliary Debug Modes Control 2 Register */ + mrc p15, 1, r0, c15, c1, 2 + bic r0, r0, #PJ4B_FAST_LDR + orr r0, r0, #PJ4B_AUX_DBG_CTRL2 + mcr p15, 1, r0, c15, c1, 2 + + /* Auxiliary Functional Modes Control Register 0 */ + mrc p15, 1, r0, c15, c2, 0 +#ifdef CONFIG_SMP + orr r0, r0, #PJ4B_SMP_CFB +#endif + orr r0, r0, #PJ4B_L1_PAR_CHK + orr r0, r0, #PJ4B_BROADCAST_CACHE + mcr p15, 1, r0, c15, c2, 0 + + /* Auxiliary Debug Modes Control 0 Register */ + mrc p15, 1, r0, c15, c1, 0 + orr r0, r0, #PJ4B_WFI_WFE + mcr p15, 1, r0, c15, c1, 0 + +#endif /* CONFIG_CPU_PJ4B */ + +__v7_setup: + adr r12, __v7_setup_stack @ the local stack + stmia r12, {r0-r5, r7, r9, r11, lr} + bl v7_flush_dcache_louis + ldmia r12, {r0-r5, r7, r9, r11, lr} + + mrc p15, 0, r0, c0, c0, 0 @ read main ID register + and r10, r0, #0xff000000 @ ARM? + teq r10, #0x41000000 + bne 3f + and r5, r0, #0x00f00000 @ variant + and r6, r0, #0x0000000f @ revision + orr r6, r6, r5, lsr #20-4 @ combine variant and revision + ubfx r0, r0, #4, #12 @ primary part number + + /* Cortex-A8 Errata */ + ldr r10, =0x00000c08 @ Cortex-A8 primary part number + teq r0, r10 + bne 2f +#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) + + teq r5, #0x00100000 @ only present in r1p* + mrceq p15, 0, r10, c1, c0, 1 @ read aux control register + orreq r10, r10, #(1 << 6) @ set IBE to 1 + mcreq p15, 0, r10, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_458693 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 0, r10, c1, c0, 1 @ read aux control register + orreq r10, r10, #(1 << 5) @ set L1NEON to 1 + orreq r10, r10, #(1 << 9) @ set PLDNOP to 1 + mcreq p15, 0, r10, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_460075 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register + tsteq r10, #1 << 22 + orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit + mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register +#endif + b 3f + + /* Cortex-A9 Errata */ +2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number + teq r0, r10 + bne 3f +#ifdef CONFIG_ARM_ERRATA_742230 + cmp r6, #0x22 @ only present up to r2p2 + mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrle r10, r10, #1 << 4 @ set bit #4 + mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_742231 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register + orreq r10, r10, #1 << 12 @ set bit #12 + orreq r10, r10, #1 << 22 @ set bit #22 + mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_743622 + teq r5, #0x00200000 @ only present in r2p* + mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register + orreq r10, r10, #1 << 6 @ set bit #6 + mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) + ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 + ALT_UP_B(1f) + mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrlt r10, r10, #1 << 11 @ set bit #11 + mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register +1: +#endif + + /* Cortex-A15 Errata */ +3: ldr r10, =0x00000c0f @ Cortex-A15 primary part number + teq r0, r10 + bne 4f + +#ifdef CONFIG_ARM_ERRATA_773022 + cmp r6, #0x4 @ only present up to r0p4 + mrcle p15, 0, r10, c1, c0, 1 @ read aux control register + orrle r10, r10, #1 << 1 @ disable loop buffer + mcrle p15, 0, r10, c1, c0, 1 @ write aux control register +#endif + +4: mov r10, #0 + mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate +#ifdef CONFIG_MMU + mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs + v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup + ldr r5, =PRRR @ PRRR + ldr r6, =NMRR @ NMRR + mcr p15, 0, r5, c10, c2, 0 @ write PRRR + mcr p15, 0, r6, c10, c2, 1 @ write NMRR +#endif + dsb @ Complete invalidations +#ifndef CONFIG_ARM_THUMBEE + mrc p15, 0, r0, c0, c1, 0 @ read ID_PFR0 for ThumbEE + and r0, r0, #(0xf << 12) @ ThumbEE enabled field + teq r0, #(1 << 12) @ check if ThumbEE is present + bne 1f + mov r5, #0 + mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0 + mrc p14, 6, r0, c0, c0, 0 @ load TEECR + orr r0, r0, #1 @ set the 1st bit in order to + mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access +1: +#endif + adr r5, v7_crval + ldmia r5, {r5, r6} + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables +#ifdef CONFIG_SWP_EMULATE + orr r5, r5, #(1 << 10) @ set SW bit in "clear" + bic r6, r6, #(1 << 10) @ clear it in "mmuset" +#endif + mrc p15, 0, r0, c1, c0, 0 @ read control register + bic r0, r0, r5 @ clear bits them + orr r0, r0, r6 @ set them + THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions + mov pc, lr @ return to head.S:__ret +ENDPROC(__v7_setup) + + .align 2 +__v7_setup_stack: + .space 4 * 11 @ 11 registers + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 +#ifdef CONFIG_CPU_PJ4B + define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 +#endif + + .section ".rodata" + + string cpu_arch_name, "armv7" + string cpu_elf_name, "v7" + .align + + .section ".proc.info.init", #alloc, #execinstr + + /* + * Standard v7 proc info content + */ +.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions + ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ + PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags) + ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ + PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags) + .long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags + W(b) \initfunc + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \ + HWCAP_EDSP | HWCAP_TLS | \hwcaps + .long cpu_v7_name + .long \proc_fns + .long v7wbi_tlb_fns + .long v6_user_fns + .long v7_cache_fns +.endm + +#ifndef CONFIG_ARM_LPAE + /* + * ARM Ltd. Cortex A5 processor. + */ + .type __v7_ca5mp_proc_info, #object +__v7_ca5mp_proc_info: + .long 0x410fc050 + .long 0xff0ffff0 + __v7_proc __v7_ca5mp_setup + .size __v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info + + /* + * ARM Ltd. Cortex A9 processor. + */ + .type __v7_ca9mp_proc_info, #object +__v7_ca9mp_proc_info: + .long 0x410fc090 + .long 0xff0ffff0 + __v7_proc __v7_ca9mp_setup + .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info + +#endif /* CONFIG_ARM_LPAE */ + + /* + * Marvell PJ4B processor. + */ +#ifdef CONFIG_CPU_PJ4B + .type __v7_pj4b_proc_info, #object +__v7_pj4b_proc_info: + .long 0x560f5800 + .long 0xff0fff00 + __v7_proc __v7_pj4b_setup, proc_fns = pj4b_processor_functions + .size __v7_pj4b_proc_info, . - __v7_pj4b_proc_info +#endif + + /* + * ARM Ltd. Cortex R7 processor. + */ + .type __v7_cr7mp_proc_info, #object +__v7_cr7mp_proc_info: + .long 0x410fc170 + .long 0xff0ffff0 + __v7_proc __v7_cr7mp_setup + .size __v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info + + /* + * ARM Ltd. Cortex A7 processor. + */ + .type __v7_ca7mp_proc_info, #object +__v7_ca7mp_proc_info: + .long 0x410fc070 + .long 0xff0ffff0 + __v7_proc __v7_ca7mp_setup + .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info + + /* + * ARM Ltd. Cortex A12 processor. + */ + .type __v7_ca12mp_proc_info, #object +__v7_ca12mp_proc_info: + .long 0x410fc0d0 + .long 0xff0ffff0 + __v7_proc __v7_ca12mp_setup + .size __v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info + + /* + * ARM Ltd. Cortex A15 processor. + */ + .type __v7_ca15mp_proc_info, #object +__v7_ca15mp_proc_info: + .long 0x410fc0f0 + .long 0xff0ffff0 + __v7_proc __v7_ca15mp_setup + .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info + + /* + * ARM Ltd. Cortex A17 processor. + */ + .type __v7_ca17mp_proc_info, #object +__v7_ca17mp_proc_info: + .long 0x410fc0e0 + .long 0xff0ffff0 + __v7_proc __v7_ca17mp_setup + .size __v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info + + /* + * Qualcomm Inc. Krait processors. + */ + .type __krait_proc_info, #object +__krait_proc_info: + .long 0x510f0400 @ Required ID value + .long 0xff0ffc00 @ Mask for ID + /* + * Some Krait processors don't indicate support for SDIV and UDIV + * instructions in the ARM instruction set, even though they actually + * do support them. + */ + __v7_proc __v7_setup, hwcaps = HWCAP_IDIV + .size __krait_proc_info, . - __krait_proc_info + + /* + * Match any ARMv7 processor core. + */ + .type __v7_proc_info, #object +__v7_proc_info: + .long 0x000f0000 @ Required ID value + .long 0x000f0000 @ Mask for ID + __v7_proc __v7_setup + .size __v7_proc_info, . - __v7_proc_info diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S new file mode 100644 index 00000000000..1ca37c72f12 --- /dev/null +++ b/arch/arm/mm/proc-v7m.S @@ -0,0 +1,159 @@ +/* + * linux/arch/arm/mm/proc-v7m.S + * + * Copyright (C) 2008 ARM Ltd. + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv7-M processor support. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/v7m.h> +#include "proc-macros.S" + +ENTRY(cpu_v7m_proc_init) + mov pc, lr +ENDPROC(cpu_v7m_proc_init) + +ENTRY(cpu_v7m_proc_fin) + mov pc, lr +ENDPROC(cpu_v7m_proc_fin) + +/* + * cpu_v7m_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * - loc - location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_v7m_reset) + mov pc, r0 +ENDPROC(cpu_v7m_reset) + +/* + * cpu_v7m_do_idle() + * + * Idle the processor (eg, wait for interrupt). + * + * IRQs are already disabled. + */ +ENTRY(cpu_v7m_do_idle) + wfi + mov pc, lr +ENDPROC(cpu_v7m_do_idle) + +ENTRY(cpu_v7m_dcache_clean_area) + mov pc, lr +ENDPROC(cpu_v7m_dcache_clean_area) + +/* + * There is no MMU, so here is nothing to do. + */ +ENTRY(cpu_v7m_switch_mm) + mov pc, lr +ENDPROC(cpu_v7m_switch_mm) + +.globl cpu_v7m_suspend_size +.equ cpu_v7m_suspend_size, 0 + +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_v7m_do_suspend) + mov pc, lr +ENDPROC(cpu_v7m_do_suspend) + +ENTRY(cpu_v7m_do_resume) + mov pc, lr +ENDPROC(cpu_v7m_do_resume) +#endif + + .section ".text.init", #alloc, #execinstr + +/* + * __v7m_setup + * + * This should be able to cover all ARMv7-M cores. + */ +__v7m_setup: + @ Configure the vector table base address + ldr r0, =BASEADDR_V7M_SCB + ldr r12, =vector_table + str r12, [r0, V7M_SCB_VTOR] + + @ enable UsageFault, BusFault and MemManage fault. + ldr r5, [r0, #V7M_SCB_SHCSR] + orr r5, #(V7M_SCB_SHCSR_USGFAULTENA | V7M_SCB_SHCSR_BUSFAULTENA | V7M_SCB_SHCSR_MEMFAULTENA) + str r5, [r0, #V7M_SCB_SHCSR] + + @ Lower the priority of the SVC and PendSV exceptions + mov r5, #0x80000000 + str r5, [r0, V7M_SCB_SHPR2] @ set SVC priority + mov r5, #0x00800000 + str r5, [r0, V7M_SCB_SHPR3] @ set PendSV priority + + @ SVC to run the kernel in this mode + adr r1, BSYM(1f) + ldr r5, [r12, #11 * 4] @ read the SVC vector entry + str r1, [r12, #11 * 4] @ write the temporary SVC vector entry + mov r6, lr @ save LR + mov r7, sp @ save SP + ldr sp, =__v7m_setup_stack_top + cpsie i + svc #0 +1: cpsid i + str r5, [r12, #11 * 4] @ restore the original SVC vector entry + mov lr, r6 @ restore LR + mov sp, r7 @ restore SP + + @ Special-purpose control register + mov r1, #1 + msr control, r1 @ Thread mode has unpriviledged access + + @ Configure the System Control Register to ensure 8-byte stack alignment + @ Note the STKALIGN bit is either RW or RAO. + ldr r12, [r0, V7M_SCB_CCR] @ system control register + orr r12, #V7M_SCB_CCR_STKALIGN + str r12, [r0, V7M_SCB_CCR] + mov pc, lr +ENDPROC(__v7m_setup) + + .align 2 +__v7m_setup_stack: + .space 4 * 8 @ 8 registers +__v7m_setup_stack_top: + + define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + string cpu_arch_name, "armv7m" + string cpu_elf_name "v7m" + string cpu_v7m_name "ARMv7-M" + + .section ".proc.info.init", #alloc, #execinstr + + /* + * Match any ARMv7-M processor core. + */ + .type __v7m_proc_info, #object +__v7m_proc_info: + .long 0x000f0000 @ Required ID value + .long 0x000f0000 @ Mask for ID + .long 0 @ proc_info_list.__cpu_mm_mmu_flags + .long 0 @ proc_info_list.__cpu_io_mmu_flags + b __v7m_setup @ proc_info_list.__cpu_flush + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT + .long cpu_v7m_name + .long v7m_processor_functions @ proc_info_list.proc + .long 0 @ proc_info_list.tlb + .long 0 @ proc_info_list.user + .long nop_cache_fns @ proc_info_list.cache + .size __v7m_proc_info, . - __v7m_proc_info + diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S new file mode 100644 index 00000000000..dc164589004 --- /dev/null +++ b/arch/arm/mm/proc-xsc3.S @@ -0,0 +1,532 @@ +/* + * linux/arch/arm/mm/proc-xsc3.S + * + * Original Author: Matthew Gilbert + * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> + * + * Copyright 2004 (C) Intel Corp. + * Copyright 2005 (C) MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMU functions for the Intel XScale3 Core (XSC3). The XSC3 core is + * an extension to Intel's original XScale core that adds the following + * features: + * + * - ARMv6 Supersections + * - Low Locality Reference pages (replaces mini-cache) + * - 36-bit addressing + * - L2 cache + * - Cache coherency if chipset supports it + * + * Based on original XScale code by Nicolas Pitre. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be flushed. If the + * area is larger than this, then we flush the whole cache. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The cache line size of the L1 I, L1 D and unified L2 cache. + */ +#define CACHELINESIZE 32 + +/* + * The size of the L1 D cache. + */ +#define CACHESIZE 32768 + +/* + * This macro is used to wait for a CP15 write and is needed when we + * have to ensure that the last operation to the coprocessor was + * completed before continuing with operation. + */ + .macro cpwait_ret, lr, rd + mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 + sub pc, \lr, \rd, LSR #32 @ wait for completion and + @ flush instruction pipeline + .endm + +/* + * This macro cleans and invalidates the entire L1 D cache. + */ + + .macro clean_d_cache rd, rs + mov \rd, #0x1f00 + orr \rd, \rd, #0x00e0 +1: mcr p15, 0, \rd, c7, c14, 2 @ clean/invalidate L1 D line + adds \rd, \rd, #0x40000000 + bcc 1b + subs \rd, \rd, #0x20 + bpl 1b + .endm + + .text + +/* + * cpu_xsc3_proc_init() + * + * Nothing too exciting at the moment + */ +ENTRY(cpu_xsc3_proc_init) + mov pc, lr + +/* + * cpu_xsc3_proc_fin() + */ +ENTRY(cpu_xsc3_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1800 @ ...IZ........... + bic r0, r0, #0x0006 @ .............CA. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_xsc3_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_xsc3_reset) + mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE + msr cpsr_c, r1 @ reset CPSR + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x3900 @ ..VIZ..S........ + bic r1, r1, #0x0086 @ ........B....CA. + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB + bic r1, r1, #0x0001 @ ...............M + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + @ CAUTION: MMU turned off from this point. We count on the pipeline + @ already containing those two last instructions to survive. + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs + mov pc, r0 +ENDPROC(cpu_xsc3_reset) + .popsection + +/* + * cpu_xsc3_do_idle() + * + * Cause the processor to idle + * + * For now we do nothing but go to idle mode for every case + * + * XScale supports clock switching, but using idle mode support + * allows external hardware to react to system state changes. + */ + .align 5 + +ENTRY(cpu_xsc3_do_idle) + mov r0, #1 + mcr p14, 0, r0, c7, c0, 0 @ go to idle + mov pc, lr + +/* ================================= CACHE ================================ */ + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(xsc3_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(xsc3_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(xsc3_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(xsc3_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + clean_d_cache r0, r1 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_user_cache_range(start, end, vm_flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vma_area_struct describing address space + */ + .align 5 +ENTRY(xsc3_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #MAX_AREA_SIZE + bhs __flush_whole_cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate L1 I line + mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the I cache and the D cache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * + * Note: single I-cache line invalidation isn't used here since + * it also trashes the mini I-cache used by JTAG debuggers. + */ +ENTRY(xsc3_coherent_kern_range) +/* FALLTHROUGH */ +ENTRY(xsc3_coherent_user_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache. + * + * - addr - kernel address + * - size - region size + */ +ENTRY(xsc3_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +xsc3_dma_inv_range: + tst r0, #CACHELINESIZE - 1 + bic r0, r0, #CACHELINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line + tst r1, #CACHELINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D line +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +xsc3_dma_clean_range: + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(xsc3_dma_flush_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xsc3_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq xsc3_dma_clean_range + bcs xsc3_dma_inv_range + b xsc3_dma_flush_range +ENDPROC(xsc3_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xsc3_dma_unmap_area) + mov pc, lr +ENDPROC(xsc3_dma_unmap_area) + + .globl xsc3_flush_kern_cache_louis + .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xsc3 + +ENTRY(cpu_xsc3_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + add r0, r0, #CACHELINESIZE + subs r1, r1, #CACHELINESIZE + bhi 1b + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_xsc3_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_xsc3_switch_mm) + clean_d_cache r1, r2 + mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + orr r0, r0, #0x18 @ cache the page table in L2 + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs + cpwait_ret lr, ip + +/* + * cpu_xsc3_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ +cpu_xsc3_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE + .long PTE_EXT_TEX(5) | PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long 0x00 @ L_PTE_MT_MINICACHE (not present) + .long PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC (not present?) + .long 0x00 @ unused + .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long 0x00 @ unused + + .align 5 +ENTRY(cpu_xsc3_set_pte_ext) + xscale_set_pte_ext_prologue + + tst r1, #L_PTE_SHARED @ shared? + and r1, r1, #L_PTE_MT_MASK + adr ip, cpu_xsc3_mt_table + ldr ip, [ip, r1] + orrne r2, r2, #PTE_EXT_COHERENT @ interlock: mask in coherent bit + bic r2, r2, #0x0c @ clear old C,B bits + orr r2, r2, ip + + xscale_set_pte_ext_epilogue + mov pc, lr + + .ltorg + .align + +.globl cpu_xsc3_suspend_size +.equ cpu_xsc3_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_xsc3_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode + mrc p15, 0, r5, c15, c1, 0 @ CP access reg + mrc p15, 0, r6, c13, c0, 0 @ PID + mrc p15, 0, r7, c3, c0, 0 @ domain ID + mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mrc p15, 0, r9, c1, c0, 0 @ control reg + bic r4, r4, #2 @ clear frequency change bit + stmia r0, {r4 - r9} @ store cp regs + ldmia sp!, {r4 - r9, pc} +ENDPROC(cpu_xsc3_do_suspend) + +ENTRY(cpu_xsc3_do_resume) + ldmia r0, {r4 - r9} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB + mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer + mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. + mcr p15, 0, r5, c15, c1, 0 @ CP access reg + mcr p15, 0, r6, c13, c0, 0 @ PID + mcr p15, 0, r7, c3, c0, 0 @ domain ID + orr r1, r1, #0x18 @ cache the page table in L2 + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_xsc3_do_resume) +#endif + + .type __xsc3_setup, #function +__xsc3_setup: + mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE + msr cpsr_c, r0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs + orr r4, r4, #0x18 @ cache the page table in L2 + mcr p15, 0, r4, c2, c0, 0 @ load page table pointer + + mov r0, #1 << 6 @ cp6 access for early sched_clock + mcr p15, 0, r0, c15, c1, 0 @ write CP access register + + mrc p15, 0, r0, c1, c0, 1 @ get auxiliary control reg + and r0, r0, #2 @ preserve bit P bit setting + orr r0, r0, #(1 << 10) @ enable L2 for LLR cache + mcr p15, 0, r0, c1, c0, 1 @ set auxiliary control reg + + adr r5, xsc3_crval + ldmia r5, {r5, r6} + +#ifdef CONFIG_CACHE_XSC3L2 + mrc p15, 1, r0, c0, c0, 1 @ get L2 present information + ands r0, r0, #0xf8 + orrne r6, r6, #(1 << 26) @ enable L2 if present +#endif + + mrc p15, 0, r0, c1, c0, 0 @ get control register + bic r0, r0, r5 @ ..V. ..R. .... ..A. + orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu) + @ ...I Z..S .... .... (uc) + mov pc, lr + + .size __xsc3_setup, . - __xsc3_setup + + .type xsc3_crval, #object +xsc3_crval: + crval clear=0x04002202, mmuset=0x00003905, ucset=0x00001900 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_xsc3_name, "XScale-V3 based processor" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xsc3_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_xsc3_name + .long xsc3_processor_functions + .long v4wbi_tlb_fns + .long xsc3_mc_user_fns + .long xsc3_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + xsc3_proc_info xsc3, 0x69056000, 0xffffe000 + +/* Note: PXA935 changed its implementor ID from Intel to Marvell */ + xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000 diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 861b3594728..d19b1cfcad9 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -17,15 +17,15 @@ * * 2001 Sep 08: * Completely revisited, many important fixes - * Nicolas Pitre <nico@cam.org> + * Nicolas Pitre <nico@fluxnic.net> */ #include <linux/linkage.h> #include <linux/init.h> #include <asm/assembler.h> -#include <asm/procinfo.h> -#include <asm/hardware.h> +#include <asm/hwcap.h> #include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> #include <asm/page.h> #include <asm/ptrace.h> #include "proc-macros.S" @@ -114,21 +114,21 @@ clean_addr: .word CLEAN_ADDR * Nothing too exciting at the moment */ ENTRY(cpu_xscale_proc_init) + @ enable write buffer coalescing. Some bootloader disable it + mrc p15, 0, r1, c1, c0, 1 + bic r1, r1, #1 + mcr p15, 0, r1, c1, c0, 1 mov pc, lr /* * cpu_xscale_proc_fin() */ ENTRY(cpu_xscale_proc_fin) - str lr, [sp, #-4]! - mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE - msr cpsr_c, r0 - bl xscale_flush_kern_cache_all @ clean caches mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1800 @ ...IZ........... bic r0, r0, #0x0006 @ .............CA. mcr p15, 0, r0, c1, c0, 0 @ disable caches - ldr pc, [sp], #4 + mov pc, lr /* * cpu_xscale_reset(loc) @@ -138,22 +138,31 @@ ENTRY(cpu_xscale_proc_fin) * to what would be the reset vector. * * loc: location to jump to for soft reset + * + * Beware PXA270 erratum E7. */ .align 5 + .pushsection .idmap.text, "ax" ENTRY(cpu_xscale_reset) mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE msr cpsr_c, r1 @ reset CPSR + mcr p15, 0, r1, c10, c4, 1 @ unlock I-TLB + mcr p15, 0, r1, c8, c5, 0 @ invalidate I-TLB mrc p15, 0, r1, c1, c0, 0 @ ctrl register bic r1, r1, #0x0086 @ ........B....CA. bic r1, r1, #0x3900 @ ..VIZ..S........ + sub pc, pc, #4 @ flush pipeline + @ *** cache line aligned *** mcr p15, 0, r1, c1, c0, 0 @ ctrl register - mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches & BTB bic r1, r1, #0x0001 @ ...............M + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches & BTB mcr p15, 0, r1, c1, c0, 0 @ ctrl register @ CAUTION: MMU turned off from this point. We count on the pipeline @ already containing those two last instructions to survive. mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs mov pc, r0 +ENDPROC(cpu_xscale_reset) + .popsection /* * cpu_xscale_do_idle() @@ -175,6 +184,17 @@ ENTRY(cpu_xscale_do_idle) /* ================================= CACHE ================================ */ /* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(xscale_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(xscale_flush_icache_all) + +/* * flush_user_cache_all() * * Invalidate all cache entries in a particular address @@ -241,7 +261,15 @@ ENTRY(xscale_flush_user_cache_range) * it also trashes the mini I-cache used by JTAG debuggers. */ ENTRY(xscale_coherent_kern_range) - /* FALLTHROUGH */ + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr /* * coherent_user_range(start, end) @@ -252,31 +280,30 @@ ENTRY(xscale_coherent_kern_range) * * - start - virtual start address * - end - virtual end address - * - * Note: single I-cache line invalidation isn't used here since - * it also trashes the mini I-cache used by JTAG debuggers. */ ENTRY(xscale_coherent_user_range) bic r0, r0, #CACHELINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ Invalidate I cache entry add r0, r0, #CACHELINESIZE cmp r0, r1 blo 1b mov r0, #0 - mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB + mcr p15, 0, r0, c7, c5, 6 @ Invalidate BTB mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer mov pc, lr /* - * flush_kern_dcache_page(void *page) + * flush_kern_dcache_area(void *addr, size_t size) * * Ensure no D cache aliasing occurs, either with itself or * the I cache * - * - addr - page aligned address + * - addr - kernel address + * - size - region size */ -ENTRY(xscale_flush_kern_dcache_page) - add r1, r0, #PAGE_SZ +ENTRY(xscale_flush_kern_dcache_area) + add r1, r0, r1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry add r0, r0, #CACHELINESIZE @@ -298,13 +325,7 @@ ENTRY(xscale_flush_kern_dcache_page) * - start - virtual start address * - end - virtual end address */ -ENTRY(xscale_dma_inv_range) - mrc p15, 0, r2, c0, c0, 0 @ read ID - eor r2, r2, #0x69000000 - eor r2, r2, #0x00052000 - bics r2, r2, #1 - beq xscale_dma_flush_range - +xscale_dma_inv_range: tst r0, #CACHELINESIZE - 1 bic r0, r0, #CACHELINESIZE - 1 mcrne p15, 0, r0, c7, c10, 1 @ clean D entry @@ -325,7 +346,7 @@ ENTRY(xscale_dma_inv_range) * - start - virtual start address * - end - virtual end address */ -ENTRY(xscale_dma_clean_range) +xscale_dma_clean_range: bic r0, r0, #CACHELINESIZE - 1 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #CACHELINESIZE @@ -352,16 +373,85 @@ ENTRY(xscale_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer mov pc, lr -ENTRY(xscale_cache_fns) - .long xscale_flush_kern_cache_all - .long xscale_flush_user_cache_all - .long xscale_flush_user_cache_range - .long xscale_coherent_kern_range - .long xscale_coherent_user_range - .long xscale_flush_kern_dcache_page - .long xscale_dma_inv_range - .long xscale_dma_clean_range - .long xscale_dma_flush_range +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq xscale_dma_clean_range + bcs xscale_dma_inv_range + b xscale_dma_flush_range +ENDPROC(xscale_dma_map_area) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_80200_A0_A1_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + beq xscale_dma_clean_range + b xscale_dma_flush_range +ENDPROC(xscale_80200_A0_A1_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_dma_unmap_area) + mov pc, lr +ENDPROC(xscale_dma_unmap_area) + + .globl xscale_flush_kern_cache_louis + .equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xscale + +/* + * On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't + * clear the dirty bits, which means that if we invalidate a dirty line, + * the dirty data can still be written back to external memory later on. + * + * The recommended workaround is to always do a clean D-cache line before + * doing an invalidate D-cache line, so on the affected processors, + * dma_inv_range() is implemented as dma_flush_range(). + * + * See erratum #25 of "Intel 80200 Processor Specification Update", + * revision January 22, 2003, available at: + * http://www.intel.com/design/iio/specupdt/273415.htm + */ +.macro a0_alias basename + .globl xscale_80200_A0_A1_\basename + .type xscale_80200_A0_A1_\basename , %function + .equ xscale_80200_A0_A1_\basename , xscale_\basename +.endm + +/* + * Most of the cache functions are unchanged for these processor revisions. + * Export suitable alias symbols for the unchanged functions: + */ + a0_alias flush_icache_all + a0_alias flush_user_cache_all + a0_alias flush_kern_cache_all + a0_alias flush_kern_cache_louis + a0_alias flush_user_cache_range + a0_alias coherent_kern_range + a0_alias coherent_user_range + a0_alias flush_kern_dcache_area + a0_alias dma_flush_range + a0_alias dma_unmap_area + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xscale_80200_A0_A1 ENTRY(cpu_xscale_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry @@ -372,8 +462,6 @@ ENTRY(cpu_xscale_dcache_clean_area) /* =============================== PageTable ============================== */ -#define PTE_CACHE_WRITE_ALLOCATE 0 - /* * cpu_xscale_switch_mm(pgd) * @@ -391,89 +479,99 @@ ENTRY(cpu_xscale_switch_mm) cpwait_ret lr, ip /* - * cpu_xscale_set_pte(ptep, pte) + * cpu_xscale_set_pte_ext(ptep, pte, ext) * * Set a PTE and flush it out * * Errata 40: must set memory to write-through for user read-only pages. */ - .align 5 -ENTRY(cpu_xscale_set_pte) - str r1, [r0], #-2048 @ linux version - - bic r2, r1, #0xff0 - orr r2, r2, #PTE_TYPE_EXT @ extended page - - eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY - - tst r3, #L_PTE_USER @ User? - orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w +cpu_xscale_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_BUFFERABLE @ L_PTE_MT_BUFFERABLE + .long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long PTE_EXT_TEX(1) | PTE_CACHEABLE @ L_PTE_MT_MINICACHE + .long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC + .long 0x00 @ unused + .long PTE_BUFFERABLE @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long 0x00 @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long 0x00 @ unused - tst r3, #L_PTE_WRITE | L_PTE_DIRTY @ Write and Dirty? - orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w - @ combined with user -> user r/w - - @ - @ Handle the X bit. We want to set this bit for the minicache - @ (U = E = B = W = 0, C = 1) or when write allocate is enabled, - @ and we have a writeable, cacheable region. If we ignore the - @ U and E bits, we can allow user space to use the minicache as - @ well. - @ - @ X = (C & ~W & ~B) | (C & W & B & write_allocate) - @ - eor ip, r1, #L_PTE_CACHEABLE - tst ip, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE -#if PTE_CACHE_WRITE_ALLOCATE - eorne ip, r1, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE - tstne ip, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE -#endif - orreq r2, r2, #PTE_EXT_TEX(1) + .align 5 +ENTRY(cpu_xscale_set_pte_ext) + xscale_set_pte_ext_prologue @ - @ Erratum 40: The B bit must be cleared for a user read-only - @ cacheable page. - @ - @ B = B & ~(U & C & ~W) + @ Erratum 40: must set memory to write-through for user read-only pages @ - and ip, r1, #L_PTE_USER | L_PTE_WRITE | L_PTE_CACHEABLE - teq ip, #L_PTE_USER | L_PTE_CACHEABLE - biceq r2, r2, #PTE_BUFFERABLE + and ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_RDONLY) & ~(4 << 2) + teq ip, #L_PTE_MT_WRITEBACK | L_PTE_USER | L_PTE_RDONLY - tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ Present and Young? - movne r2, #0 @ no -> fault + moveq r1, #L_PTE_MT_WRITETHROUGH + and r1, r1, #L_PTE_MT_MASK + adr ip, cpu_xscale_mt_table + ldr ip, [ip, r1] + bic r2, r2, #0x0c + orr r2, r2, ip - str r2, [r0] @ hardware version - mov ip, #0 - mcr p15, 0, r0, c7, c10, 1 @ Clean D cache line - mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + xscale_set_pte_ext_epilogue mov pc, lr - .ltorg - .align - __INIT +.globl cpu_xscale_suspend_size +.equ cpu_xscale_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_xscale_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode + mrc p15, 0, r5, c15, c1, 0 @ CP access reg + mrc p15, 0, r6, c13, c0, 0 @ PID + mrc p15, 0, r7, c3, c0, 0 @ domain ID + mrc p15, 0, r8, c1, c1, 0 @ auxiliary control reg + mrc p15, 0, r9, c1, c0, 0 @ control reg + bic r4, r4, #2 @ clear frequency change bit + stmia r0, {r4 - r9} @ store cp regs + ldmfd sp!, {r4 - r9, pc} +ENDPROC(cpu_xscale_do_suspend) + +ENTRY(cpu_xscale_do_resume) + ldmia r0, {r4 - r9} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB + mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. + mcr p15, 0, r5, c15, c1, 0 @ CP access reg + mcr p15, 0, r6, c13, c0, 0 @ PID + mcr p15, 0, r7, c3, c0, 0 @ domain ID + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r8, c1, c1, 0 @ auxiliary control reg + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_xscale_do_resume) +#endif .type __xscale_setup, #function __xscale_setup: mcr p15, 0, ip, c7, c7, 0 @ invalidate I, D caches & BTB mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer mcr p15, 0, ip, c8, c7, 0 @ invalidate I, D TLBs -#ifdef CONFIG_IWMMXT - mov r0, #0 @ initially disallow access to CP0/CP1 -#else - mov r0, #1 @ Allow access to CP0 -#endif - orr r0, r0, #1 << 6 @ cp6 for IOP3xx and Bulverde + mov r0, #1 << 6 @ cp6 for IOP3xx and Bulverde orr r0, r0, #1 << 13 @ Its undefined whether this mcr p15, 0, r0, c15, c1, 0 @ affects USR or SVC modes + + adr r5, xscale_crval + ldmia r5, {r5, r6} mrc p15, 0, r0, c1, c0, 0 @ get control register - ldr r5, xscale_cr1_clear bic r0, r0, r5 - ldr r5, xscale_cr1_set - orr r0, r0, r5 + orr r0, r0, r6 mov pc, lr .size __xscale_setup, . - __xscale_setup @@ -483,316 +581,80 @@ __xscale_setup: * ..11 1.01 .... .101 * */ - .type xscale_cr1_clear, #object - .type xscale_cr1_set, #object -xscale_cr1_clear: - .word 0x3b07 -xscale_cr1_set: - .word 0x3905 + .type xscale_crval, #object +xscale_crval: + crval clear=0x00003b07, mmuset=0x00003905, ucset=0x00001900 __INITDATA -/* - * Purpose : Function pointers used to access above functions - all calls - * come through these - */ - - .type xscale_processor_functions, #object -ENTRY(xscale_processor_functions) - .word v5t_early_abort - .word cpu_xscale_proc_init - .word cpu_xscale_proc_fin - .word cpu_xscale_reset - .word cpu_xscale_do_idle - .word cpu_xscale_dcache_clean_area - .word cpu_xscale_switch_mm - .word cpu_xscale_set_pte - .size xscale_processor_functions, . - xscale_processor_functions + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions xscale, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 .section ".rodata" - .type cpu_arch_name, #object -cpu_arch_name: - .asciz "armv5te" - .size cpu_arch_name, . - cpu_arch_name - - .type cpu_elf_name, #object -cpu_elf_name: - .asciz "v5" - .size cpu_elf_name, . - cpu_elf_name - - .type cpu_80200_name, #object -cpu_80200_name: - .asciz "XScale-80200" - .size cpu_80200_name, . - cpu_80200_name - - .type cpu_8032x_name, #object -cpu_8032x_name: - .asciz "XScale-IOP8032x Family" - .size cpu_8032x_name, . - cpu_8032x_name - - .type cpu_8033x_name, #object -cpu_8033x_name: - .asciz "XScale-IOP8033x Family" - .size cpu_8033x_name, . - cpu_8033x_name - - .type cpu_pxa250_name, #object -cpu_pxa250_name: - .asciz "XScale-PXA250" - .size cpu_pxa250_name, . - cpu_pxa250_name - - .type cpu_pxa210_name, #object -cpu_pxa210_name: - .asciz "XScale-PXA210" - .size cpu_pxa210_name, . - cpu_pxa210_name - - .type cpu_ixp42x_name, #object -cpu_ixp42x_name: - .asciz "XScale-IXP42x Family" - .size cpu_ixp42x_name, . - cpu_ixp42x_name - - .type cpu_ixp46x_name, #object -cpu_ixp46x_name: - .asciz "XScale-IXP46x Family" - .size cpu_ixp46x_name, . - cpu_ixp46x_name - - .type cpu_ixp2400_name, #object -cpu_ixp2400_name: - .asciz "XScale-IXP2400" - .size cpu_ixp2400_name, . - cpu_ixp2400_name - - .type cpu_ixp2800_name, #object -cpu_ixp2800_name: - .asciz "XScale-IXP2800" - .size cpu_ixp2800_name, . - cpu_ixp2800_name - - .type cpu_pxa255_name, #object -cpu_pxa255_name: - .asciz "XScale-PXA255" - .size cpu_pxa255_name, . - cpu_pxa255_name - - .type cpu_pxa270_name, #object -cpu_pxa270_name: - .asciz "XScale-PXA270" - .size cpu_pxa270_name, . - cpu_pxa270_name + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + + string cpu_80200_A0_A1_name, "XScale-80200 A0/A1" + string cpu_80200_name, "XScale-80200" + string cpu_80219_name, "XScale-80219" + string cpu_8032x_name, "XScale-IOP8032x Family" + string cpu_8033x_name, "XScale-IOP8033x Family" + string cpu_pxa250_name, "XScale-PXA250" + string cpu_pxa210_name, "XScale-PXA210" + string cpu_ixp42x_name, "XScale-IXP42x Family" + string cpu_ixp43x_name, "XScale-IXP43x Family" + string cpu_ixp46x_name, "XScale-IXP46x Family" + string cpu_ixp2400_name, "XScale-IXP2400" + string cpu_ixp2800_name, "XScale-IXP2800" + string cpu_pxa255_name, "XScale-PXA255" + string cpu_pxa270_name, "XScale-PXA270" .align .section ".proc.info.init", #alloc, #execinstr - .type __80200_proc_info,#object -__80200_proc_info: - .long 0x69052000 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ +.macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ PMD_SECT_BUFFERABLE | \ PMD_SECT_CACHEABLE | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_80200_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __80200_proc_info, . - __80200_proc_info - - .type __8032x_proc_info,#object -__8032x_proc_info: - .long 0x69052420 - .long 0xfffff5e0 @ mask should accomodate IOP80219 also - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ + .long PMD_TYPE_SECT | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ b __xscale_setup .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_8032x_name + .long \cpu_name .long xscale_processor_functions .long v4wbi_tlb_fns .long xscale_mc_user_fns - .long xscale_cache_fns - .size __8032x_proc_info, . - __8032x_proc_info - - .type __8033x_proc_info,#object -__8033x_proc_info: - .long 0x69054010 - .long 0xffffff30 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_8033x_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __8033x_proc_info, . - __8033x_proc_info - - .type __pxa250_proc_info,#object -__pxa250_proc_info: - .long 0x69052100 - .long 0xfffff7f0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_pxa250_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __pxa250_proc_info, . - __pxa250_proc_info - - .type __pxa210_proc_info,#object -__pxa210_proc_info: - .long 0x69052120 - .long 0xfffff3f0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_pxa210_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __pxa210_proc_info, . - __pxa210_proc_info - - .type __ixp2400_proc_info, #object -__ixp2400_proc_info: - .long 0x69054190 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_ixp2400_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __ixp2400_proc_info, . - __ixp2400_proc_info - - .type __ixp2800_proc_info, #object -__ixp2800_proc_info: - .long 0x690541a0 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_ixp2800_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __ixp2800_proc_info, . - __ixp2800_proc_info - - .type __ixp42x_proc_info, #object -__ixp42x_proc_info: - .long 0x690541c0 - .long 0xffffffc0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_ixp42x_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __ixp42x_proc_info, . - __ixp42x_proc_info - - .type __ixp46x_proc_info, #object -__ixp46x_proc_info: - .long 0x69054200 - .long 0xffffff00 - .long 0x00000c0e - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_ixp46x_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __ixp46x_proc_info, . - __ixp46x_proc_info - - .type __pxa255_proc_info,#object -__pxa255_proc_info: - .long 0x69052d00 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_pxa255_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __pxa255_proc_info, . - __pxa255_proc_info - - .type __pxa270_proc_info,#object -__pxa270_proc_info: - .long 0x69054110 - .long 0xfffffff0 - .long PMD_TYPE_SECT | \ - PMD_SECT_BUFFERABLE | \ - PMD_SECT_CACHEABLE | \ - PMD_SECT_AP_WRITE | \ - PMD_SECT_AP_READ - b __xscale_setup - .long cpu_arch_name - .long cpu_elf_name - .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP - .long cpu_pxa270_name - .long xscale_processor_functions - .long v4wbi_tlb_fns - .long xscale_mc_user_fns - .long xscale_cache_fns - .size __pxa270_proc_info, . - __pxa270_proc_info - + .ifb \cache + .long xscale_cache_fns + .else + .long \cache + .endif + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + xscale_proc_info 80200_A0_A1, 0x69052000, 0xfffffffe, cpu_80200_name, \ + cache=xscale_80200_A0_A1_cache_fns + xscale_proc_info 80200, 0x69052000, 0xfffffff0, cpu_80200_name + xscale_proc_info 80219, 0x69052e20, 0xffffffe0, cpu_80219_name + xscale_proc_info 8032x, 0x69052420, 0xfffff7e0, cpu_8032x_name + xscale_proc_info 8033x, 0x69054010, 0xfffffd30, cpu_8033x_name + xscale_proc_info pxa250, 0x69052100, 0xfffff7f0, cpu_pxa250_name + xscale_proc_info pxa210, 0x69052120, 0xfffff3f0, cpu_pxa210_name + xscale_proc_info ixp2400, 0x69054190, 0xfffffff0, cpu_ixp2400_name + xscale_proc_info ixp2800, 0x690541a0, 0xfffffff0, cpu_ixp2800_name + xscale_proc_info ixp42x, 0x690541c0, 0xffffffc0, cpu_ixp42x_name + xscale_proc_info ixp43x, 0x69054040, 0xfffffff0, cpu_ixp43x_name + xscale_proc_info ixp46x, 0x69054200, 0xffffff00, cpu_ixp46x_name + xscale_proc_info pxa255, 0x69052d00, 0xfffffff0, cpu_pxa255_name + xscale_proc_info pxa270, 0x69054110, 0xfffffff0, cpu_pxa270_name diff --git a/arch/arm/mm/tcm.h b/arch/arm/mm/tcm.h new file mode 100644 index 00000000000..8015ad434a4 --- /dev/null +++ b/arch/arm/mm/tcm.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2008-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * TCM memory handling for ARM systems + * + * Author: Linus Walleij <linus.walleij@stericsson.com> + * Author: Rickard Andersson <rickard.andersson@stericsson.com> + */ + +#ifdef CONFIG_HAVE_TCM +void __init tcm_init(void); +#else +/* No TCM support, just blank inlines to be optimized out */ +inline void tcm_init(void) +{ +} +#endif diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S new file mode 100644 index 00000000000..d3ddcf9a76c --- /dev/null +++ b/arch/arm/mm/tlb-fa.S @@ -0,0 +1,69 @@ +/* + * linux/arch/arm/mm/tlb-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * Based on tlb-v4wbi.S: + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 4, Faraday variation. + * This assume an unified TLBs, with a write buffer, and branch target buffer (BTB) + * + * Processors: FA520 FA526 FA626 + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/asm-offsets.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + + +/* + * flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 4 +ENTRY(fa_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + movne pc, lr @ no, we dont do anything + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + mov pc, lr + + +ENTRY(fa_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + mcr p15, 0, r3, c7, c5, 4 @ prefetch flush (isb) + mov pc, lr + + __INITDATA + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions fa, fa_tlb_flags diff --git a/arch/arm/mm/tlb-v3.S b/arch/arm/mm/tlb-v3.S deleted file mode 100644 index c10786ec8e0..00000000000 --- a/arch/arm/mm/tlb-v3.S +++ /dev/null @@ -1,52 +0,0 @@ -/* - * linux/arch/arm/mm/tlbv3.S - * - * Copyright (C) 1997-2002 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * ARM architecture version 3 TLB handling functions. - * - * Processors: ARM610, ARM710. - */ -#include <linux/linkage.h> -#include <linux/init.h> -#include <asm/asm-offsets.h> -#include <asm/tlbflush.h> -#include "proc-macros.S" - - .align 5 -/* - * v3_flush_user_tlb_range(start, end, mm) - * - * Invalidate a range of TLB entries in the specified address space. - * - * - start - range start address - * - end - range end address - * - mm - mm_struct describing address space - */ - .align 5 -ENTRY(v3_flush_user_tlb_range) - vma_vm_mm r2, r2 - act_mm r3 @ get current->active_mm - teq r2, r3 @ == mm ? - movne pc, lr @ no, we dont do anything -ENTRY(v3_flush_kern_tlb_range) - bic r0, r0, #0x0ff - bic r0, r0, #0xf00 -1: mcr p15, 0, r0, c6, c0, 0 @ invalidate TLB entry - add r0, r0, #PAGE_SZ - cmp r0, r1 - blo 1b - mov pc, lr - - __INITDATA - - .type v3_tlb_fns, #object -ENTRY(v3_tlb_fns) - .long v3_flush_user_tlb_range - .long v3_flush_kern_tlb_range - .long v3_tlb_flags - .size v3_tlb_fns, . - v3_tlb_fns diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S index d6c94457c2b..17a025ade57 100644 --- a/arch/arm/mm/tlb-v4.S +++ b/arch/arm/mm/tlb-v4.S @@ -57,9 +57,5 @@ ENTRY(v4_flush_user_tlb_range) __INITDATA - .type v4_tlb_fns, #object -ENTRY(v4_tlb_fns) - .long v4_flush_user_tlb_range - .long v4_flush_kern_tlb_range - .long v4_tlb_flags - .size v4_tlb_fns, . - v4_tlb_fns + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4, v4_tlb_flags diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S index cb829ca7845..c04598fa4d4 100644 --- a/arch/arm/mm/tlb-v4wb.S +++ b/arch/arm/mm/tlb-v4wb.S @@ -69,9 +69,5 @@ ENTRY(v4wb_flush_kern_tlb_range) __INITDATA - .type v4wb_tlb_fns, #object -ENTRY(v4wb_tlb_fns) - .long v4wb_flush_user_tlb_range - .long v4wb_flush_kern_tlb_range - .long v4wb_tlb_flags - .size v4wb_tlb_fns, . - v4wb_tlb_fns + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4wb, v4wb_tlb_flags diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S index 60cfc4a25dd..1f6062b6c1c 100644 --- a/arch/arm/mm/tlb-v4wbi.S +++ b/arch/arm/mm/tlb-v4wbi.S @@ -60,9 +60,5 @@ ENTRY(v4wbi_flush_kern_tlb_range) __INITDATA - .type v4wbi_tlb_fns, #object -ENTRY(v4wbi_tlb_fns) - .long v4wbi_flush_user_tlb_range - .long v4wbi_flush_kern_tlb_range - .long v4wbi_tlb_flags - .size v4wbi_tlb_fns, . - v4wbi_tlb_fns + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4wbi, v4wbi_tlb_flags diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S index 6f76b89ef46..eca07f550a0 100644 --- a/arch/arm/mm/tlb-v6.S +++ b/arch/arm/mm/tlb-v6.S @@ -10,6 +10,7 @@ * ARM architecture version 6 TLB handling functions. * These assume a split I/D TLB. */ +#include <linux/init.h> #include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/page.h> @@ -53,6 +54,7 @@ ENTRY(v6wbi_flush_user_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b + mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier mov pc, lr /* @@ -80,13 +82,11 @@ ENTRY(v6wbi_flush_kern_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b + mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier + mcr p15, 0, r2, c7, c5, 4 @ prefetch flush (isb) mov pc, lr - .section ".text.init", #alloc, #execinstr + __INIT - .type v6wbi_tlb_fns, #object -ENTRY(v6wbi_tlb_fns) - .long v6wbi_flush_user_tlb_range - .long v6wbi_flush_kern_tlb_range - .long v6wbi_tlb_flags - .size v6wbi_tlb_fns, . - v6wbi_tlb_fns + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v6wbi, v6wbi_tlb_flags diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S new file mode 100644 index 00000000000..355308767ba --- /dev/null +++ b/arch/arm/mm/tlb-v7.S @@ -0,0 +1,95 @@ +/* + * linux/arch/arm/mm/tlb-v7.S + * + * Copyright (C) 1997-2002 Russell King + * Modified for ARMv7 by Catalin Marinas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 6 TLB handling functions. + * These assume a split I/D TLB. + */ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + +/* + * v7wbi_flush_user_tlb_range(start, end, vma) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vma_struct describing address range + * + * It is assumed that: + * - the "Invalidate single entry" instruction will invalidate + * both the I and the D TLBs on Harvard-style TLBs + */ +ENTRY(v7wbi_flush_user_tlb_range) + vma_vm_mm r3, r2 @ get vma->vm_mm + mmid r3, r3 @ get vm_mm->context.id + dsb ish + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + asid r3, r3 @ mask ASID +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(W(mov) r3, #0 ) + ALT_UP(W(nop) ) +#endif + orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA + mov r1, r1, lsl #PAGE_SHIFT +1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else + ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif + ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA + + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + dsb ish + mov pc, lr +ENDPROC(v7wbi_flush_user_tlb_range) + +/* + * v7wbi_flush_kern_tlb_range(start,end) + * + * Invalidate a range of kernel TLB entries + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + */ +ENTRY(v7wbi_flush_kern_tlb_range) + dsb ish + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + mov r0, r0, lsl #PAGE_SHIFT + mov r1, r1, lsl #PAGE_SHIFT +1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else + ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif + ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + dsb ish + isb + mov pc, lr +ENDPROC(v7wbi_flush_kern_tlb_range) + + __INIT + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v7wbi, v7wbi_tlb_flags_up, flags_smp=v7wbi_tlb_flags_smp |
