diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-03-30 14:04:53 +1100 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-03-30 14:04:53 +1100 |
commit | 9ff9a26b786c35ee8d2a66222924a807ec851a9f (patch) | |
tree | db432a17bccca1ca2c16907f0ee83ac449ed4012 /arch/arm/mm | |
parent | 0a3108beea9143225119d5e7c72a8e2c64f3eb7d (diff) | |
parent | 0d34fb8e93ceba7b6dad0062dbb4a0813bacd75b (diff) |
Merge commit 'origin/master' into next
Manual merge of:
arch/powerpc/include/asm/elf.h
drivers/i2c/busses/i2c-mpc.c
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/Kconfig | 51 | ||||
-rw-r--r-- | arch/arm/mm/Makefile | 6 | ||||
-rw-r--r-- | arch/arm/mm/cache-fa.S | 220 | ||||
-rw-r--r-- | arch/arm/mm/cache-feroceon-l2.c | 54 | ||||
-rw-r--r-- | arch/arm/mm/cache-xsc3l2.c | 107 | ||||
-rw-r--r-- | arch/arm/mm/copypage-fa.c | 86 | ||||
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 72 | ||||
-rw-r--r-- | arch/arm/mm/flush.c | 2 | ||||
-rw-r--r-- | arch/arm/mm/highmem.c | 116 | ||||
-rw-r--r-- | arch/arm/mm/init.c | 21 | ||||
-rw-r--r-- | arch/arm/mm/mm.h | 3 | ||||
-rw-r--r-- | arch/arm/mm/mmu.c | 43 | ||||
-rw-r--r-- | arch/arm/mm/proc-fa526.S | 248 | ||||
-rw-r--r-- | arch/arm/mm/proc-mohawk.S | 416 | ||||
-rw-r--r-- | arch/arm/mm/tlb-fa.S | 75 |
15 files changed, 1464 insertions, 56 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index d490f3773c0..20979564e7e 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -186,6 +186,24 @@ config CPU_ARM926T Say Y if you want support for the ARM926T processor. Otherwise, say N. +# FA526 +config CPU_FA526 + bool + select CPU_32v4 + select CPU_ABRT_EV4 + select CPU_PABRT_NOIFAR + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_CACHE_FA + select CPU_COPY_FA if MMU + select CPU_TLB_FA if MMU + help + The FA526 is a version of the ARMv4 compatible processor with + Branch Target Buffer, Unified TLB and cache line size 16. + + Say Y if you want support for the FA526 processor. + Otherwise, say N. + # ARM940T config CPU_ARM940T bool "Support ARM940T processor" if ARCH_INTEGRATOR @@ -340,6 +358,17 @@ config CPU_XSC3 select CPU_TLB_V4WBI if MMU select IO_36 +# Marvell PJ1 (Mohawk) +config CPU_MOHAWK + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_PABRT_NOIFAR + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_TLB_V4WBI if MMU + select CPU_COPY_V4WB if MMU + # Feroceon config CPU_FEROCEON bool @@ -484,6 +513,9 @@ config CPU_CACHE_VIVT config CPU_CACHE_VIPT bool +config CPU_CACHE_FA + bool + if MMU # The copy-page model config CPU_COPY_V3 @@ -498,6 +530,9 @@ config CPU_COPY_V4WB config CPU_COPY_FEROCEON bool +config CPU_COPY_FA + bool + config CPU_COPY_V6 bool @@ -528,6 +563,13 @@ config CPU_TLB_FEROCEON help Feroceon TLB (v4wbi with non-outer-cachable page table walks). +config CPU_TLB_FA + bool + help + Faraday ARM FA526 architecture, unified TLB with writeback cache + and invalidate instruction cache entry. Branch target buffer is + also supported. + config CPU_TLB_V6 bool @@ -569,7 +611,7 @@ comment "Processor Features" config ARM_THUMB bool "Support Thumb user binaries" - depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_V6 || CPU_V7 || CPU_FEROCEON + depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V7 || CPU_FEROCEON default y help Say Y if you want to include kernel support for running user space @@ -638,7 +680,7 @@ config CPU_DCACHE_SIZE config CPU_DCACHE_WRITETHROUGH bool "Force write through D-cache" - depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020) && !CPU_DCACHE_DISABLE + depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE default y if CPU_ARM925T help Say Y here to use the data cache in writethrough mode. Unless you @@ -653,7 +695,7 @@ config CPU_CACHE_ROUND_ROBIN config CPU_BPREDICT_DISABLE bool "Disable branch prediction" - depends on CPU_ARM1020 || CPU_V6 || CPU_XSC3 || CPU_V7 + depends on CPU_ARM1020 || CPU_V6 || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 help Say Y here to disable branch prediction. If unsure, say N. @@ -704,7 +746,8 @@ config CACHE_FEROCEON_L2_WRITETHROUGH config CACHE_L2X0 bool "Enable the L2x0 outer cache controller" - depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || REALVIEW_EB_A9MP + depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176 || \ + REALVIEW_EB_A9MP || ARCH_MX35 || ARCH_MX31 default y select OUTER_CACHE help diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 480f78a3611..63e3f6dd0e2 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_DISCONTIGMEM) += discontig.o +obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o @@ -32,6 +33,7 @@ obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o +obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o @@ -41,6 +43,7 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o context.o obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o +obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o @@ -49,6 +52,7 @@ obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o +obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o @@ -62,6 +66,7 @@ obj-$(CONFIG_CPU_ARM925T) += proc-arm925.o obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o obj-$(CONFIG_CPU_ARM940T) += proc-arm940.o obj-$(CONFIG_CPU_ARM946E) += proc-arm946.o +obj-$(CONFIG_CPU_FA526) += proc-fa526.o obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o @@ -70,6 +75,7 @@ obj-$(CONFIG_CPU_SA110) += proc-sa110.o obj-$(CONFIG_CPU_SA1100) += proc-sa1100.o obj-$(CONFIG_CPU_XSCALE) += proc-xscale.o obj-$(CONFIG_CPU_XSC3) += proc-xsc3.o +obj-$(CONFIG_CPU_MOHAWK) += proc-mohawk.o obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o obj-$(CONFIG_CPU_V6) += proc-v6.o obj-$(CONFIG_CPU_V7) += proc-v7.o diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S new file mode 100644 index 00000000000..b63a8f7b95c --- /dev/null +++ b/arch/arm/mm/cache-fa.S @@ -0,0 +1,220 @@ +/* + * linux/arch/arm/mm/cache-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * Based on cache-v4wb.S: + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Processors: FA520 FA526 FA626 + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/memory.h> +#include <asm/page.h> + +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 16 + +/* + * The total size of the data cache. + */ +#ifdef CONFIG_ARCH_GEMINI +#define CACHE_DSIZE 8192 +#else +#define CACHE_DSIZE 16384 +#endif + +/* FIXME: put optimal value here. Current one is just estimation */ +#define CACHE_DLIMIT (CACHE_DSIZE * 2) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular address + * space. + */ +ENTRY(fa_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(fa_flush_kern_cache_all) + mov ip, #0 + mov r2, #VM_EXEC +__flush_whole_cache: + mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(fa_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT @ total size >= limit? + bhs __flush_whole_cache @ flush whole D cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_kern_range) + /* fall through */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_kern_dcache_page(kaddr) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - kaddr - kernel address (guaranteed to be page aligned) + */ +ENTRY(fa_flush_kern_dcache_page) + add r1, r0, #PAGE_SZ +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_dma_inv_range) + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + tst r1, #CACHE_DLINESIZE - 1 + bic r1, r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_dma_clean_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(fa_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + + __INITDATA + + .type fa_cache_fns, #object +ENTRY(fa_cache_fns) + .long fa_flush_kern_cache_all + .long fa_flush_user_cache_all + .long fa_flush_user_cache_range + .long fa_coherent_kern_range + .long fa_coherent_user_range + .long fa_flush_kern_dcache_page + .long fa_dma_inv_range + .long fa_dma_clean_range + .long fa_dma_flush_range + .size fa_cache_fns, . - fa_cache_fns diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c index 80cd207cbae..d6dd83826f8 100644 --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -14,8 +14,12 @@ #include <linux/init.h> #include <asm/cacheflush.h> +#include <asm/kmap_types.h> +#include <asm/fixmap.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> #include <plat/cache-feroceon-l2.h> - +#include "mm.h" /* * Low-level cache maintenance operations. @@ -34,14 +38,36 @@ * The range operations require two successive cp15 writes, in * between which we don't want to be preempted. */ + +static inline unsigned long l2_start_va(unsigned long paddr) +{ +#ifdef CONFIG_HIGHMEM + /* + * Let's do our own fixmap stuff in a minimal way here. + * Because range ops can't be done on physical addresses, + * we simply install a virtual mapping for it only for the + * TLB lookup to occur, hence no need to flush the untouched + * memory mapping. This is protected with the disabling of + * interrupts by the caller. + */ + unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); + unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0); + local_flush_tlb_kernel_page(vaddr); + return vaddr + (paddr & ~PAGE_MASK); +#else + return __phys_to_virt(paddr); +#endif +} + static inline void l2_clean_pa(unsigned long addr) { __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); } -static inline void l2_clean_mva_range(unsigned long start, unsigned long end) +static inline void l2_clean_pa_range(unsigned long start, unsigned long end) { - unsigned long flags; + unsigned long va_start, va_end, flags; /* * Make sure 'start' and 'end' reference the same page, as @@ -51,17 +77,14 @@ static inline void l2_clean_mva_range(unsigned long start, unsigned long end) BUG_ON((start ^ end) >> PAGE_SHIFT); raw_local_irq_save(flags); + va_start = l2_start_va(start); + va_end = va_start + (end - start); __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" "mcr p15, 1, %1, c15, c9, 5" - : : "r" (start), "r" (end)); + : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); } -static inline void l2_clean_pa_range(unsigned long start, unsigned long end) -{ - l2_clean_mva_range(__phys_to_virt(start), __phys_to_virt(end)); -} - static inline void l2_clean_inv_pa(unsigned long addr) { __asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr)); @@ -72,9 +95,9 @@ static inline void l2_inv_pa(unsigned long addr) __asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr)); } -static inline void l2_inv_mva_range(unsigned long start, unsigned long end) +static inline void l2_inv_pa_range(unsigned long start, unsigned long end) { - unsigned long flags; + unsigned long va_start, va_end, flags; /* * Make sure 'start' and 'end' reference the same page, as @@ -84,17 +107,14 @@ static inline void l2_inv_mva_range(unsigned long start, unsigned long end) BUG_ON((start ^ end) >> PAGE_SHIFT); raw_local_irq_save(flags); + va_start = l2_start_va(start); + va_end = va_start + (end - start); __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" "mcr p15, 1, %1, c15, c11, 5" - : : "r" (start), "r" (end)); + : : "r" (va_start), "r" (va_end)); raw_local_irq_restore(flags); } -static inline void l2_inv_pa_range(unsigned long start, unsigned long end) -{ - l2_inv_mva_range(__phys_to_virt(start), __phys_to_virt(end)); -} - /* * Linux primitives. diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c index 464de893a98..5d180cb0bd9 100644 --- a/arch/arm/mm/cache-xsc3l2.c +++ b/arch/arm/mm/cache-xsc3l2.c @@ -17,12 +17,14 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/init.h> -#include <linux/spinlock.h> -#include <linux/io.h> - #include <asm/system.h> #include <asm/cputype.h> #include <asm/cacheflush.h> +#include <asm/kmap_types.h> +#include <asm/fixmap.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include "mm.h" #define CR_L2 (1 << 26) @@ -47,21 +49,11 @@ static inline void xsc3_l2_clean_mva(unsigned long addr) __asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr)); } -static inline void xsc3_l2_clean_pa(unsigned long addr) -{ - xsc3_l2_clean_mva(__phys_to_virt(addr)); -} - static inline void xsc3_l2_inv_mva(unsigned long addr) { __asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr)); } -static inline void xsc3_l2_inv_pa(unsigned long addr) -{ - xsc3_l2_inv_mva(__phys_to_virt(addr)); -} - static inline void xsc3_l2_inv_all(void) { unsigned long l2ctype, set_way; @@ -79,50 +71,103 @@ static inline void xsc3_l2_inv_all(void) dsb(); } +#ifdef CONFIG_HIGHMEM +#define l2_map_save_flags(x) raw_local_save_flags(x) +#define l2_map_restore_flags(x) raw_local_irq_restore(x) +#else +#define l2_map_save_flags(x) ((x) = 0) +#define l2_map_restore_flags(x) ((void)(x)) +#endif + +static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va, + unsigned long flags) +{ +#ifdef CONFIG_HIGHMEM + unsigned long va = prev_va & PAGE_MASK; + unsigned long pa_offset = pa << (32 - PAGE_SHIFT); + if (unlikely(pa_offset < (prev_va << (32 - PAGE_SHIFT)))) { + /* + * Switching to a new page. Because cache ops are + * using virtual addresses only, we must put a mapping + * in place for it. We also enable interrupts for a + * short while and disable them again to protect this + * mapping. + */ + unsigned long idx; + raw_local_irq_restore(flags); + idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); + va = __fix_to_virt(FIX_KMAP_BEGIN + idx); + raw_local_irq_restore(flags | PSR_I_BIT); + set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0); + local_flush_tlb_kernel_page(va); + } + return va + (pa_offset >> (32 - PAGE_SHIFT)); +#else + return __phys_to_virt(pa); +#endif +} + static void xsc3_l2_inv_range(unsigned long start, unsigned long end) { + unsigned long vaddr, flags; + if (start == 0 && end == -1ul) { xsc3_l2_inv_all(); return; } + vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); + /* * Clean and invalidate partial first cache line. */ if (start & (CACHE_LINE_SIZE - 1)) { - xsc3_l2_clean_pa(start & ~(CACHE_LINE_SIZE - 1)); - xsc3_l2_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); start = (start | (CACHE_LINE_SIZE - 1)) + 1; } /* - * Clean and invalidate partial last cache line. + * Invalidate all full cache lines between 'start' and 'end'. */ - if (start < end && (end & (CACHE_LINE_SIZE - 1))) { - xsc3_l2_clean_pa(end & ~(CACHE_LINE_SIZE - 1)); - xsc3_l2_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); - end &= ~(CACHE_LINE_SIZE - 1); + while (start < (end & ~(CACHE_LINE_SIZE - 1))) { + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_inv_mva(vaddr); + start += CACHE_LINE_SIZE; } /* - * Invalidate all full cache lines between 'start' and 'end'. + * Clean and invalidate partial last cache line. */ - while (start < end) { - xsc3_l2_inv_pa(start); - start += CACHE_LINE_SIZE; + if (start < end) { + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); } + l2_map_restore_flags(flags); + dsb(); } static void xsc3_l2_clean_range(unsigned long start, unsigned long end) { + unsigned long vaddr, flags; + + vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); + start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - xsc3_l2_clean_pa(start); + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_clean_mva(vaddr); start += CACHE_LINE_SIZE; } + l2_map_restore_flags(flags); + dsb(); } @@ -148,18 +193,26 @@ static inline void xsc3_l2_flush_all(void) static void xsc3_l2_flush_range(unsigned long start, unsigned long end) { + unsigned long vaddr, flags; + if (start == 0 && end == -1ul) { xsc3_l2_flush_all(); return; } + vaddr = -1; /* to force the first mapping */ + l2_map_save_flags(flags); + start &= ~(CACHE_LINE_SIZE - 1); while (start < end) { - xsc3_l2_clean_pa(start); - xsc3_l2_inv_pa(start); + vaddr = l2_map_va(start, vaddr, flags); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); start += CACHE_LINE_SIZE; } + l2_map_restore_flags(flags); + dsb(); } diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c new file mode 100644 index 00000000000..b2a6008b011 --- /dev/null +++ b/arch/arm/mm/copypage-fa.c @@ -0,0 +1,86 @@ +/* + * linux/arch/arm/lib/copypage-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * Based on copypage-v4wb.S: + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * Faraday optimised copy_user_page + */ +static void __naked +fa_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, lr} @ 2\n\ + mov r2, %0 @ 1\n\ +1: ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + subs r2, r2, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r2, c7, c10, 4 @ 1 drain WB\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "I" (PAGE_SIZE / 32)); +} + +void fa_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to, KM_USER0); + kfrom = kmap_atomic(from, KM_USER1); + fa_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom, KM_USER1); + kunmap_atomic(kto, KM_USER0); +} + +/* + * Faraday optimised clear_user_page + * + * Same story as above. + */ +void fa_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page, KM_USER0); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr, KM_USER0); +} + +struct cpu_user_fns fa_user_fns __initdata = { + .cpu_clear_user_highpage = fa_clear_user_highpage, + .cpu_copy_user_highpage = fa_copy_user_highpage, +}; diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f1ef5613ccd..510c179b0ac 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -19,6 +19,7 @@ #include <linux/dma-mapping.h> #include <asm/memory.h> +#include <asm/highmem.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/sizes.h> @@ -517,6 +518,74 @@ void dma_cache_maint(const void *start, size_t size, int direction) } EXPORT_SYMBOL(dma_cache_maint); +static void dma_cache_maint_contiguous(struct page *page, unsigned long offset, + size_t size, int direction) +{ + void *vaddr; + unsigned long paddr; + void (*inner_op)(const void *, const void *); + void (*outer_op)(unsigned long, unsigned long); + + switch (direction) { + case DMA_FROM_DEVICE: /* invalidate only */ + inner_op = dmac_inv_range; + outer_op = outer_inv_range; + break; + case DMA_TO_DEVICE: /* writeback only */ + inner_op = dmac_clean_range; + outer_op = outer_clean_range; + break; + case DMA_BIDIRECTIONAL: /* writeback and invalidate */ + inner_op = dmac_flush_range; + outer_op = outer_flush_range; + break; + default: + BUG(); + } + + if (!PageHighMem(page)) { + vaddr = page_address(page) + offset; + inner_op(vaddr, vaddr + size); + } else { + vaddr = kmap_high_get(page); + if (vaddr) { + vaddr += offset; + inner_op(vaddr, vaddr + size); + kunmap_high(page); + } + } + + paddr = page_to_phys(page) + offset; + outer_op(paddr, paddr + size); +} + +void dma_cache_maint_page(struct page *page, unsigned long offset, + size_t size, int dir) +{ + /* + * A single sg entry may refer to multiple physically contiguous + * pages. But we still need to process highmem pages individually. + * If highmem is not configured then the bulk of this loop gets + * optimized out. + */ + size_t left = size; + do { + size_t len = left; + if (PageHighMem(page) && len + offset > PAGE_SIZE) { + if (offset >= PAGE_SIZE) { + page += offset / PAGE_SIZE; + offset %= PAGE_SIZE; + } + len = PAGE_SIZE - offset; + } + dma_cache_maint_contiguous(page, offset, len, dir); + offset = 0; + page++; + left -= len; + } while (left); +} +EXPORT_SYMBOL(dma_cache_maint_page); + /** * dma_map_sg - map a set of SG buffers for streaming mode DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -614,7 +683,8 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, continue; if (!arch_is_coherent()) - dma_cache_maint(sg_virt(s), s->length, dir); + dma_cache_maint_page(sg_page(s), s->offset, + s->length, dir); } } EXPORT_SYMBOL(dma_sync_sg_for_device); diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 0fa9bf388f0..4e283481cee 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -192,7 +192,7 @@ void flush_dcache_page(struct page *page) struct address_space *mapping = page_mapping(page); #ifndef CONFIG_SMP - if (mapping && !mapping_mapped(mapping)) + if (!PageHighMem(page) && mapping && !mapping_mapped(mapping)) set_bit(PG_dcache_dirty, &page->flags); else #endif diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c new file mode 100644 index 00000000000..a34954d9df7 --- /dev/null +++ b/arch/arm/mm/highmem.c @@ -0,0 +1,116 @@ +/* + * arch/arm/mm/highmem.c -- ARM highmem support + * + * Author: Nicolas Pitre + * Created: september 8, 2008 + * Copyright: Marvell Semiconductors Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/highmem.h> +#include <linux/interrupt.h> +#include <asm/fixmap.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include "mm.h" + +void *kmap(struct page *page) +{ + might_sleep(); + if (!PageHighMem(page)) + return page_address(page); + return kmap_high(page); +} +EXPORT_SYMBOL(kmap); + +void kunmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} +EXPORT_SYMBOL(kunmap); + +void *kmap_atomic(struct page *page, enum km_type type) +{ + unsigned int idx; + unsigned long vaddr; + + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + /* + * With debugging enabled, kunmap_atomic forces that entry to 0. + * Make sure it was indeed properly unmapped. + */ + BUG_ON(!pte_none(*(TOP_PTE(vaddr)))); +#endif + set_pte_ext(TOP_PTE(vaddr), mk_pte(page, kmap_prot), 0); + /* + * When debugging is off, kunmap_atomic leaves the previous mapping + * in place, so this TLB flush ensures the TLB is updated with the + * new mapping. + */ + local_flush_tlb_kernel_page(vaddr); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic); + +void kunmap_atomic(void *kvaddr, enum km_type type) +{ + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + unsigned int idx = type + KM_TYPE_NR * smp_processor_id(); + + if (kvaddr >= (void *)FIXADDR_START) { + __cpuc_flush_dcache_page((void *)vaddr); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); + set_pte_ext(TOP_PTE(vaddr), __pte(0), 0); + local_flush_tlb_kernel_page(vaddr); +#else + (void) idx; /* to kill a warning */ +#endif + } + pagefault_enable(); +} +EXPORT_SYMBOL(kunmap_atomic); + +void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +{ + unsigned int idx; + unsigned long vaddr; + + pagefault_disable(); + + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(!pte_none(*(TOP_PTE(vaddr)))); +#endif + set_pte_ext(TOP_PTE(vaddr), pfn_pte(pfn, kmap_prot), 0); + local_flush_tlb_kernel_page(vaddr); + + return (void *)vaddr; +} + +struct page *k |