diff options
Diffstat (limited to 'arch/sh/mm')
40 files changed, 1923 insertions, 1390 deletions
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index 1445ca6257d..dba285e8680 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -79,11 +79,11 @@ config 29BIT config 32BIT bool - default y if CPU_SH5 + default y if CPU_SH5 || !MMU config PMB bool "Support 32-bit physical addressing through PMB" - depends on MMU && EXPERIMENTAL && CPU_SH4A && !CPU_SH4AL_DSP + depends on MMU && CPU_SH4A && !CPU_SH4AL_DSP select 32BIT select UNCACHED_MAPPING help @@ -110,7 +110,8 @@ config VSYSCALL config NUMA bool "Non Uniform Memory Access (NUMA) Support" - depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL + depends on MMU && SYS_SUPPORTS_NUMA + select ARCH_WANT_NUMA_VARIABLE_LOCALITY default n help Some SH systems have many various memories scattered around @@ -136,16 +137,6 @@ config ARCH_SPARSEMEM_ENABLE config ARCH_SPARSEMEM_DEFAULT def_bool y -config MAX_ACTIVE_REGIONS - int - default "6" if (CPU_SUBTYPE_SHX3 && SPARSEMEM) - default "2" if SPARSEMEM && (CPU_SUBTYPE_SH7722 || \ - CPU_SUBTYPE_SH7785) - default "1" - -config ARCH_POPULATES_NODE_MAP - def_bool y - config ARCH_SELECT_MEMORY_MODEL def_bool y @@ -168,6 +159,10 @@ config IOREMAP_FIXED config UNCACHED_MAPPING bool +config HAVE_SRAM_POOL + bool + select GENERIC_ALLOCATOR + choice prompt "Kernel page size" default PAGE_SIZE_4KB diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile index 3dc8a8a6382..cee6b9999d8 100644 --- a/arch/sh/mm/Makefile +++ b/arch/sh/mm/Makefile @@ -10,21 +10,23 @@ cacheops-$(CONFIG_CPU_SH3) := cache-sh3.o cacheops-$(CONFIG_CPU_SH4) := cache-sh4.o flush-sh4.o cacheops-$(CONFIG_CPU_SH5) := cache-sh5.o flush-sh4.o cacheops-$(CONFIG_SH7705_CACHE_32KB) += cache-sh7705.o +cacheops-$(CONFIG_CPU_SHX3) += cache-shx3.o obj-y += $(cacheops-y) mmu-y := nommu.o extable_32.o -mmu-$(CONFIG_MMU) := extable_$(BITS).o fault_$(BITS).o \ - ioremap.o kmap.o pgtable.o tlbflush_$(BITS).o +mmu-$(CONFIG_MMU) := extable_$(BITS).o fault.o gup.o ioremap.o kmap.o \ + pgtable.o tlbex_$(BITS).o tlbflush_$(BITS).o obj-y += $(mmu-y) -obj-$(CONFIG_DEBUG_FS) += asids-debugfs.o -ifdef CONFIG_DEBUG_FS -obj-$(CONFIG_CPU_SH4) += cache-debugfs.o +debugfs-y := asids-debugfs.o +ifndef CONFIG_CACHE_OFF +debugfs-$(CONFIG_CPU_SH4) += cache-debugfs.o endif ifdef CONFIG_MMU +debugfs-$(CONFIG_CPU_SH4) += tlb-debugfs.o tlb-$(CONFIG_CPU_SH3) := tlb-sh3.o tlb-$(CONFIG_CPU_SH4) := tlb-sh4.o tlb-urb.o tlb-$(CONFIG_CPU_SH5) := tlb-sh5.o @@ -32,13 +34,17 @@ tlb-$(CONFIG_CPU_HAS_PTEAEX) := tlb-pteaex.o tlb-urb.o obj-y += $(tlb-y) endif +obj-$(CONFIG_DEBUG_FS) += $(debugfs-y) obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PMB) += pmb.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_IOREMAP_FIXED) += ioremap_fixed.o obj-$(CONFIG_UNCACHED_MAPPING) += uncached.o +obj-$(CONFIG_HAVE_SRAM_POOL) += sram.o -# Special flags for fault_64.o. This puts restrictions on the number of +GCOV_PROFILE_pmb.o := n + +# Special flags for tlbex_64.o. This puts restrictions on the number of # caller-save registers that the compiler can target when building this file. # This is required because the code is called from a context in entry.S where # very few registers have been saved in the exception handler (for speed @@ -53,7 +59,7 @@ obj-$(CONFIG_UNCACHED_MAPPING) += uncached.o # The resources not listed below are callee save, i.e. the compiler is free to # use any of them and will spill them to the stack itself. -CFLAGS_fault_64.o += -ffixed-r7 \ +CFLAGS_tlbex_64.o += -ffixed-r7 \ -ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \ -ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \ -ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \ @@ -63,4 +69,4 @@ CFLAGS_fault_64.o += -ffixed-r7 \ -ffixed-r60 -ffixed-r61 -ffixed-r62 \ -fomit-frame-pointer -EXTRA_CFLAGS += -Werror +ccflags-y := -Werror diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c index b2595b8548e..ec2b2530242 100644 --- a/arch/sh/mm/alignment.c +++ b/arch/sh/mm/alignment.c @@ -13,6 +13,7 @@ #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/uaccess.h> +#include <linux/ratelimit.h> #include <asm/alignment.h> #include <asm/processor.h> @@ -95,13 +96,13 @@ int set_unalign_ctl(struct task_struct *tsk, unsigned int val) void unaligned_fixups_notify(struct task_struct *tsk, insn_size_t insn, struct pt_regs *regs) { - if (user_mode(regs) && (se_usermode & UM_WARN) && printk_ratelimit()) - pr_notice("Fixing up unaligned userspace access " + if (user_mode(regs) && (se_usermode & UM_WARN)) + pr_notice_ratelimited("Fixing up unaligned userspace access " "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n", tsk->comm, task_pid_nr(tsk), (void *)instruction_pointer(regs), insn); - else if (se_kernmode_warn && printk_ratelimit()) - pr_notice("Fixing up unaligned kernel access " + else if (se_kernmode_warn) + pr_notice_ratelimited("Fixing up unaligned kernel access " "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n", tsk->comm, task_pid_nr(tsk), (void *)instruction_pointer(regs), insn); @@ -139,7 +140,7 @@ static int alignment_proc_open(struct inode *inode, struct file *file) static ssize_t alignment_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { - int *data = PDE(file->f_path.dentry->d_inode)->data; + int *data = PDE_DATA(file_inode(file)); char mode; if (count > 0) { diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c index cd8c3bf39b5..74c03ecc487 100644 --- a/arch/sh/mm/asids-debugfs.c +++ b/arch/sh/mm/asids-debugfs.c @@ -63,7 +63,7 @@ static int __init asids_debugfs_init(void) { struct dentry *asids_dentry; - asids_dentry = debugfs_create_file("asids", S_IRUSR, sh_debugfs_root, + asids_dentry = debugfs_create_file("asids", S_IRUSR, arch_debugfs_dir, NULL, &asids_debugfs_fops); if (!asids_dentry) return -ENOMEM; diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c index 690ed010d00..777e50f33c0 100644 --- a/arch/sh/mm/cache-debugfs.c +++ b/arch/sh/mm/cache-debugfs.c @@ -26,9 +26,9 @@ static int cache_seq_show(struct seq_file *file, void *iter) { unsigned int cache_type = (unsigned int)file->private; struct cache_info *cache; - unsigned int waysize, way, cache_size; - unsigned long ccr, base; - static unsigned long addrstart = 0; + unsigned int waysize, way; + unsigned long ccr; + unsigned long addrstart = 0; /* * Go uncached immediately so we don't skew the results any @@ -36,7 +36,7 @@ static int cache_seq_show(struct seq_file *file, void *iter) */ jump_to_uncached(); - ccr = __raw_readl(CCR); + ccr = __raw_readl(SH_CCR); if ((ccr & CCR_CACHE_ENABLE) == 0) { back_to_cached(); @@ -45,28 +45,13 @@ static int cache_seq_show(struct seq_file *file, void *iter) } if (cache_type == CACHE_TYPE_DCACHE) { - base = CACHE_OC_ADDRESS_ARRAY; + addrstart = CACHE_OC_ADDRESS_ARRAY; cache = ¤t_cpu_data.dcache; } else { - base = CACHE_IC_ADDRESS_ARRAY; + addrstart = CACHE_IC_ADDRESS_ARRAY; cache = ¤t_cpu_data.icache; } - /* - * Due to the amount of data written out (depending on the cache size), - * we may be iterated over multiple times. In this case, keep track of - * the entry position in addrstart, and rewind it when we've hit the - * end of the cache. - * - * Likewise, the same code is used for multiple caches, so care must - * be taken for bouncing addrstart back and forth so the appropriate - * cache is hit. - */ - cache_size = cache->ways * cache->sets * cache->linesz; - if (((addrstart & 0xff000000) != base) || - (addrstart & 0x00ffffff) > cache_size) - addrstart = base; - waysize = cache->sets; /* @@ -126,25 +111,19 @@ static int __init cache_debugfs_init(void) { struct dentry *dcache_dentry, *icache_dentry; - dcache_dentry = debugfs_create_file("dcache", S_IRUSR, sh_debugfs_root, + dcache_dentry = debugfs_create_file("dcache", S_IRUSR, arch_debugfs_dir, (unsigned int *)CACHE_TYPE_DCACHE, &cache_debugfs_fops); if (!dcache_dentry) return -ENOMEM; - if (IS_ERR(dcache_dentry)) - return PTR_ERR(dcache_dentry); - icache_dentry = debugfs_create_file("icache", S_IRUSR, sh_debugfs_root, + icache_dentry = debugfs_create_file("icache", S_IRUSR, arch_debugfs_dir, (unsigned int *)CACHE_TYPE_ICACHE, &cache_debugfs_fops); if (!icache_dentry) { debugfs_remove(dcache_dentry); return -ENOMEM; } - if (IS_ERR(icache_dentry)) { - debugfs_remove(dcache_dentry); - return PTR_ERR(icache_dentry); - } return 0; } diff --git a/arch/sh/mm/cache-sh2.c b/arch/sh/mm/cache-sh2.c index defcf719f2e..a74259f2f98 100644 --- a/arch/sh/mm/cache-sh2.c +++ b/arch/sh/mm/cache-sh2.c @@ -63,9 +63,9 @@ static void sh2__flush_invalidate_region(void *start, int size) local_irq_save(flags); jump_to_uncached(); - ccr = __raw_readl(CCR); + ccr = __raw_readl(SH_CCR); ccr |= CCR_CACHE_INVALIDATE; - __raw_writel(ccr, CCR); + __raw_writel(ccr, SH_CCR); back_to_cached(); local_irq_restore(flags); diff --git a/arch/sh/mm/cache-sh2a.c b/arch/sh/mm/cache-sh2a.c index 1f51225426a..ee87d081259 100644 --- a/arch/sh/mm/cache-sh2a.c +++ b/arch/sh/mm/cache-sh2a.c @@ -15,35 +15,80 @@ #include <asm/cacheflush.h> #include <asm/io.h> +/* + * The maximum number of pages we support up to when doing ranged dcache + * flushing. Anything exceeding this will simply flush the dcache in its + * entirety. + */ +#define MAX_OCACHE_PAGES 32 +#define MAX_ICACHE_PAGES 32 + +#ifdef CONFIG_CACHE_WRITEBACK +static void sh2a_flush_oc_line(unsigned long v, int way) +{ + unsigned long addr = (v & 0x000007f0) | (way << 11); + unsigned long data; + + data = __raw_readl(CACHE_OC_ADDRESS_ARRAY | addr); + if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) { + data &= ~SH_CACHE_UPDATED; + __raw_writel(data, CACHE_OC_ADDRESS_ARRAY | addr); + } +} +#endif + +static void sh2a_invalidate_line(unsigned long cache_addr, unsigned long v) +{ + /* Set associative bit to hit all ways */ + unsigned long addr = (v & 0x000007f0) | SH_CACHE_ASSOC; + __raw_writel((addr & CACHE_PHYSADDR_MASK), cache_addr | addr); +} + +/* + * Write back the dirty D-caches, but not invalidate them. + */ static void sh2a__flush_wback_region(void *start, int size) { +#ifdef CONFIG_CACHE_WRITEBACK unsigned long v; unsigned long begin, end; unsigned long flags; + int nr_ways; begin = (unsigned long)start & ~(L1_CACHE_BYTES-1); end = ((unsigned long)start + size + L1_CACHE_BYTES-1) & ~(L1_CACHE_BYTES-1); + nr_ways = current_cpu_data.dcache.ways; local_irq_save(flags); jump_to_uncached(); - for (v = begin; v < end; v+=L1_CACHE_BYTES) { - unsigned long addr = CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0); + /* If there are too many pages then flush the entire cache */ + if (((end - begin) >> PAGE_SHIFT) >= MAX_OCACHE_PAGES) { + begin = CACHE_OC_ADDRESS_ARRAY; + end = begin + (nr_ways * current_cpu_data.dcache.way_size); + + for (v = begin; v < end; v += L1_CACHE_BYTES) { + unsigned long data = __raw_readl(v); + if (data & SH_CACHE_UPDATED) + __raw_writel(data & ~SH_CACHE_UPDATED, v); + } + } else { int way; - for (way = 0; way < 4; way++) { - unsigned long data = __raw_readl(addr | (way << 11)); - if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) { - data &= ~SH_CACHE_UPDATED; - __raw_writel(data, addr | (way << 11)); - } + for (way = 0; way < nr_ways; way++) { + for (v = begin; v < end; v += L1_CACHE_BYTES) + sh2a_flush_oc_line(v, way); } } back_to_cached(); local_irq_restore(flags); +#endif } +/* + * Write back the dirty D-caches and invalidate them. + */ static void sh2a__flush_purge_region(void *start, int size) { unsigned long v; @@ -58,13 +103,22 @@ static void sh2a__flush_purge_region(void *start, int size) jump_to_uncached(); for (v = begin; v < end; v+=L1_CACHE_BYTES) { - __raw_writel((v & CACHE_PHYSADDR_MASK), - CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008); +#ifdef CONFIG_CACHE_WRITEBACK + int way; + int nr_ways = current_cpu_data.dcache.ways; + for (way = 0; way < nr_ways; way++) + sh2a_flush_oc_line(v, way); +#endif + sh2a_invalidate_line(CACHE_OC_ADDRESS_ARRAY, v); } + back_to_cached(); local_irq_restore(flags); } +/* + * Invalidate the D-caches, but no write back please + */ static void sh2a__flush_invalidate_region(void *start, int size) { unsigned long v; @@ -74,29 +128,26 @@ static void sh2a__flush_invalidate_region(void *start, int size) begin = (unsigned long)start & ~(L1_CACHE_BYTES-1); end = ((unsigned long)start + size + L1_CACHE_BYTES-1) & ~(L1_CACHE_BYTES-1); + local_irq_save(flags); jump_to_uncached(); -#ifdef CONFIG_CACHE_WRITEBACK - __raw_writel(__raw_readl(CCR) | CCR_OCACHE_INVALIDATE, CCR); - /* I-cache invalidate */ - for (v = begin; v < end; v+=L1_CACHE_BYTES) { - __raw_writel((v & CACHE_PHYSADDR_MASK), - CACHE_IC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008); + /* If there are too many pages then just blow the cache */ + if (((end - begin) >> PAGE_SHIFT) >= MAX_OCACHE_PAGES) { + __raw_writel(__raw_readl(SH_CCR) | CCR_OCACHE_INVALIDATE, + SH_CCR); + } else { + for (v = begin; v < end; v += L1_CACHE_BYTES) + sh2a_invalidate_line(CACHE_OC_ADDRESS_ARRAY, v); } -#else - for (v = begin; v < end; v+=L1_CACHE_BYTES) { - __raw_writel((v & CACHE_PHYSADDR_MASK), - CACHE_IC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008); - __raw_writel((v & CACHE_PHYSADDR_MASK), - CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008); - } -#endif + back_to_cached(); local_irq_restore(flags); } -/* WBack O-Cache and flush I-Cache */ +/* + * Write back the range of D-cache, and purge the I-cache. + */ static void sh2a_flush_icache_range(void *args) { struct flusher_data *data = args; @@ -107,23 +158,21 @@ static void sh2a_flush_icache_range(void *args) start = data->addr1 & ~(L1_CACHE_BYTES-1); end = (data->addr2 + L1_CACHE_BYTES-1) & ~(L1_CACHE_BYTES-1); +#ifdef CONFIG_CACHE_WRITEBACK + sh2a__flush_wback_region((void *)start, end-start); +#endif + local_irq_save(flags); jump_to_uncached(); - for (v = start; v < end; v+=L1_CACHE_BYTES) { - unsigned long addr = (v & 0x000007f0); - int way; - /* O-Cache writeback */ - for (way = 0; way < 4; way++) { - unsigned long data = __raw_readl(CACHE_OC_ADDRESS_ARRAY | addr | (way << 11)); - if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) { - data &= ~SH_CACHE_UPDATED; - __raw_writel(data, CACHE_OC_ADDRESS_ARRAY | addr | (way << 11)); - } - } - /* I-Cache invalidate */ - __raw_writel(addr, - CACHE_IC_ADDRESS_ARRAY | addr | 0x00000008); + /* I-Cache invalidate */ + /* If there are too many pages then just blow the cache */ + if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) { + __raw_writel(__raw_readl(SH_CCR) | CCR_ICACHE_INVALIDATE, + SH_CCR); + } else { + for (v = start; v < end; v += L1_CACHE_BYTES) + sh2a_invalidate_line(CACHE_IC_ADDRESS_ARRAY, v); } back_to_cached(); diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 2cfae81914a..51d8f7f31d1 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -18,6 +18,7 @@ #include <linux/highmem.h> #include <asm/pgtable.h> #include <asm/mmu_context.h> +#include <asm/cache_insns.h> #include <asm/cacheflush.h> /* @@ -114,7 +115,7 @@ static void sh4_flush_dcache_page(void *arg) struct address_space *mapping = page_mapping(page); if (mapping && !mapping_mapped(mapping)) - set_bit(PG_dcache_dirty, &page->flags); + clear_bit(PG_dcache_clean, &page->flags); else #endif flush_cache_one(CACHE_OC_ADDRESS_ARRAY | @@ -132,9 +133,9 @@ static void flush_icache_all(void) jump_to_uncached(); /* Flush I-cache */ - ccr = __raw_readl(CCR); + ccr = __raw_readl(SH_CCR); ccr |= CCR_CACHE_ICI; - __raw_writel(ccr, CCR); + __raw_writel(ccr, SH_CCR); /* * back_to_cached() will take care of the barrier for us, don't add @@ -239,12 +240,12 @@ static void sh4_flush_cache_page(void *args) * another ASID than the current one. */ map_coherent = (current_cpu_data.dcache.n_aliases && - !test_bit(PG_dcache_dirty, &page->flags) && + test_bit(PG_dcache_clean, &page->flags) && page_mapped(page)); if (map_coherent) vaddr = kmap_coherent(page, address); else - vaddr = kmap_atomic(page, KM_USER0); + vaddr = kmap_atomic(page); address = (unsigned long)vaddr; } @@ -259,7 +260,7 @@ static void sh4_flush_cache_page(void *args) if (map_coherent) kunmap_coherent(vaddr); else - kunmap_atomic(vaddr, KM_USER0); + kunmap_atomic(vaddr); } } diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c index eb4cc4ec795..d1bffbcd9d5 100644 --- a/arch/sh/mm/cache-sh5.c +++ b/arch/sh/mm/cache-sh5.c @@ -568,7 +568,7 @@ static void sh5_flush_dcache_page(void *page) } /* - * Flush the range [start,end] of kernel virtual adddress space from + * Flush the range [start,end] of kernel virtual address space from * the I-cache. The corresponding range must be purged from the * D-cache also because the SH-5 doesn't have cache snooping between * the caches. The addresses will be visible through the superpage diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c index f498da1cce7..7729cca727e 100644 --- a/arch/sh/mm/cache-sh7705.c +++ b/arch/sh/mm/cache-sh7705.c @@ -139,7 +139,7 @@ static void sh7705_flush_dcache_page(void *arg) struct address_space *mapping = page_mapping(page); if (mapping && !mapping_mapped(mapping)) - set_bit(PG_dcache_dirty, &page->flags); + clear_bit(PG_dcache_clean, &page->flags); else __flush_dcache_page(__pa(page_address(page))); } diff --git a/arch/sh/mm/cache-shx3.c b/arch/sh/mm/cache-shx3.c new file mode 100644 index 00000000000..24c58b7dc02 --- /dev/null +++ b/arch/sh/mm/cache-shx3.c @@ -0,0 +1,44 @@ +/* + * arch/sh/mm/cache-shx3.c - SH-X3 optimized cache ops + * + * Copyright (C) 2010 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/io.h> +#include <asm/cache.h> + +#define CCR_CACHE_SNM 0x40000 /* Hardware-assisted synonym avoidance */ +#define CCR_CACHE_IBE 0x1000000 /* ICBI broadcast */ + +void __init shx3_cache_init(void) +{ + unsigned int ccr; + + ccr = __raw_readl(SH_CCR); + + /* + * If we've got cache aliases, resolve them in hardware. + */ + if (boot_cpu_data.dcache.n_aliases || boot_cpu_data.icache.n_aliases) { + ccr |= CCR_CACHE_SNM; + + boot_cpu_data.icache.n_aliases = 0; + boot_cpu_data.dcache.n_aliases = 0; + + pr_info("Enabling hardware synonym avoidance\n"); + } + +#ifdef CONFIG_SMP + /* + * Broadcast I-cache block invalidations by default. + */ + ccr |= CCR_CACHE_IBE; +#endif + + writel_uncached(ccr, SH_CCR); +} diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c index 0f4095d7ac8..097c2cdd117 100644 --- a/arch/sh/mm/cache.c +++ b/arch/sh/mm/cache.c @@ -60,14 +60,14 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long len) { if (boot_cpu_data.dcache.n_aliases && page_mapped(page) && - !test_bit(PG_dcache_dirty, &page->flags)) { + test_bit(PG_dcache_clean, &page->flags)) { void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); memcpy(vto, src, len); kunmap_coherent(vto); } else { memcpy(dst, src, len); if (boot_cpu_data.dcache.n_aliases) - set_bit(PG_dcache_dirty, &page->flags); + clear_bit(PG_dcache_clean, &page->flags); } if (vma->vm_flags & VM_EXEC) @@ -79,14 +79,14 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page, unsigned long len) { if (boot_cpu_data.dcache.n_aliases && page_mapped(page) && - !test_bit(PG_dcache_dirty, &page->flags)) { + test_bit(PG_dcache_clean, &page->flags)) { void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); memcpy(dst, vfrom, len); kunmap_coherent(vfrom); } else { memcpy(dst, src, len); if (boot_cpu_data.dcache.n_aliases) - set_bit(PG_dcache_dirty, &page->flags); + clear_bit(PG_dcache_clean, &page->flags); } } @@ -95,23 +95,24 @@ void copy_user_highpage(struct page *to, struct page *from, { void *vfrom, *vto; - vto = kmap_atomic(to, KM_USER1); + vto = kmap_atomic(to); if (boot_cpu_data.dcache.n_aliases && page_mapped(from) && - !test_bit(PG_dcache_dirty, &from->flags)) { + test_bit(PG_dcache_clean, &from->flags)) { vfrom = kmap_coherent(from, vaddr); copy_page(vto, vfrom); kunmap_coherent(vfrom); } else { - vfrom = kmap_atomic(from, KM_USER0); + vfrom = kmap_atomic(from); copy_page(vto, vfrom); - kunmap_atomic(vfrom, KM_USER0); + kunmap_atomic(vfrom); } - if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK)) + if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK) || + (vma->vm_flags & VM_EXEC)) __flush_purge_region(vto, PAGE_SIZE); - kunmap_atomic(vto, KM_USER1); + kunmap_atomic(vto); /* Make sure this page is cleared on other CPU's too before using it */ smp_wmb(); } @@ -119,14 +120,14 @@ EXPORT_SYMBOL(copy_user_highpage); void clear_user_highpage(struct page *page, unsigned long vaddr) { - void *kaddr = kmap_atomic(page, KM_USER0); + void *kaddr = kmap_atomic(page); clear_page(kaddr); if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK)) __flush_purge_region(kaddr, PAGE_SIZE); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } EXPORT_SYMBOL(clear_user_highpage); @@ -141,7 +142,7 @@ void __update_cache(struct vm_area_struct *vma, page = pfn_to_page(pfn); if (pfn_valid(pfn)) { - int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags); + int dirty = !test_and_set_bit(PG_dcache_clean, &page->flags); if (dirty) __flush_purge_region(page_address(page), PAGE_SIZE); } @@ -153,7 +154,7 @@ void __flush_anon_page(struct page *page, unsigned long vmaddr) if (pages_do_alias(addr, vmaddr)) { if (boot_cpu_data.dcache.n_aliases && page_mapped(page) && - !test_bit(PG_dcache_dirty, &page->flags)) { + test_bit(PG_dcache_clean, &page->flags)) { void *kaddr; kaddr = kmap_coherent(page, vmaddr); @@ -284,8 +285,8 @@ void __init cpu_cache_init(void) { unsigned int cache_disabled = 0; -#ifdef CCR - cache_disabled = !(__raw_readl(CCR) & CCR_CACHE_ENABLE); +#ifdef SH_CCR + cache_disabled = !(__raw_readl(SH_CCR) & CCR_CACHE_ENABLE); #endif compute_alias(&boot_cpu_data.icache); @@ -334,6 +335,13 @@ void __init cpu_cache_init(void) extern void __weak sh4_cache_init(void); sh4_cache_init(); + + if ((boot_cpu_data.type == CPU_SH7786) || + (boot_cpu_data.type == CPU_SHX3)) { + extern void __weak shx3_cache_init(void); + + shx3_cache_init(); + } } if (boot_cpu_data.family == CPU_FAMILY_SH5) { diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 902967e3f84..b81d9dbf9fe 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -16,6 +16,7 @@ #include <linux/dma-debug.h> #include <linux/io.h> #include <linux/module.h> +#include <linux/gfp.h> #include <asm/cacheflush.h> #include <asm/addrspace.h> @@ -32,16 +33,18 @@ static int __init dma_init(void) fs_initcall(dma_init); void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) + dma_addr_t *dma_handle, gfp_t gfp, + struct dma_attrs *attrs) { void *ret, *ret_nocache; int order = get_order(size); + gfp |= __GFP_ZERO; + ret = (void *)__get_free_pages(gfp, order); if (!ret) return NULL; - memset(ret, 0, size); /* * Pages from the page allocator may have data present in * cache. So flush the cache before using uncached memory. @@ -62,7 +65,8 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, } void dma_generic_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) { int order = get_order(size); unsigned long pfn = dma_handle >> PAGE_SHIFT; @@ -77,21 +81,20 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { -#if defined(CONFIG_CPU_SH5) || defined(CONFIG_PMB) - void *p1addr = vaddr; -#else - void *p1addr = (void*) P1SEGADDR((unsigned long)vaddr); -#endif + void *addr; + + addr = __in_29bit_mode() ? + (void *)CAC_ADDR((unsigned long)vaddr) : vaddr; switch (direction) { case DMA_FROM_DEVICE: /* invalidate only */ - __flush_invalidate_region(p1addr, size); + __flush_invalidate_region(addr, size); break; case DMA_TO_DEVICE: /* writeback only */ - __flush_wback_region(p1addr, size); + __flush_wback_region(addr, size); break; case DMA_BIDIRECTIONAL: /* writeback and invalidate */ - __flush_purge_region(p1addr, size); + __flush_purge_region(addr, size); break; default: BUG(); diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c new file mode 100644 index 00000000000..541dc610150 --- /dev/null +++ b/arch/sh/mm/fault.c @@ -0,0 +1,517 @@ +/* + * Page fault handler for SH with an MMU. + * + * Copyright (C) 1999 Niibe Yutaka + * Copyright (C) 2003 - 2012 Paul Mundt + * + * Based on linux/arch/i386/mm/fault.c: + * Copyright (C) 1995 Linus Torvalds + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/hardirq.h> +#include <linux/kprobes.h> +#include <linux/perf_event.h> +#include <linux/kdebug.h> +#include <asm/io_trapped.h> +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> +#include <asm/traps.h> + +static inline int notify_page_fault(struct pt_regs *regs, int trap) +{ + int ret = 0; + + if (kprobes_built_in() && !user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, trap)) + ret = 1; + preempt_enable(); + } + + return ret; +} + +static void +force_sig_info_fault(int si_signo, int si_code, unsigned long address, + struct task_struct *tsk) +{ + siginfo_t info; + + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *)address; + + force_sig_info(si_signo, &info, tsk); +} + +/* + * This is useful to dump out the page tables associated with + * 'addr' in mm 'mm'. + */ +static void show_pte(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + + if (mm) { + pgd = mm->pgd; + } else { + pgd = get_TTB(); + + if (unlikely(!pgd)) + pgd = swapper_pg_dir; + } + + printk(KERN_ALERT "pgd = %p\n", pgd); + pgd += pgd_index(addr); + printk(KERN_ALERT "[%08lx] *pgd=%0*Lx", addr, + (u32)(sizeof(*pgd) * 2), (u64)pgd_val(*pgd)); + + do { + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (pgd_none(*pgd)) + break; + + if (pgd_bad(*pgd)) { + printk("(bad)"); + break; + } + + pud = pud_offset(pgd, addr); + if (PTRS_PER_PUD != 1) + printk(", *pud=%0*Lx", (u32)(sizeof(*pud) * 2), + (u64)pud_val(*pud)); + + if (pud_none(*pud)) + break; + + if (pud_bad(*pud)) { + printk("(bad)"); + break; + } + + pmd = pmd_offset(pud, addr); + if (PTRS_PER_PMD != 1) + printk(", *pmd=%0*Lx", (u32)(sizeof(*pmd) * 2), + (u64)pmd_val(*pmd)); + + if (pmd_none(*pmd)) + break; + + if (pmd_bad(*pmd)) { + printk("(bad)"); + break; + } + + /* We must not map this if we have highmem enabled */ + if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT))) + break; + + pte = pte_offset_kernel(pmd, addr); + printk(", *pte=%0*Lx", (u32)(sizeof(*pte) * 2), + (u64)pte_val(*pte)); + } while (0); + + printk("\n"); +} + +static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) +{ + unsigned index = pgd_index(address); + pgd_t *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + pgd += index; + pgd_k = init_mm.pgd + index; + + if (!pgd_present(*pgd_k)) + return NULL; + + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + return NULL; + + if (!pud_present(*pud)) + set_pud(pud, *pud_k); + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); + if (!pmd_present(*pmd_k)) + return NULL; + + if (!pmd_present(*pmd)) + set_pmd(pmd, *pmd_k); + else { + /* + * The page tables are fully synchronised so there must + * be another reason for the fault. Return NULL here to + * signal that we have not taken care of the fault. + */ + BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); + return NULL; + } + + return pmd_k; +} + +#ifdef CONFIG_SH_STORE_QUEUES +#define __FAULT_ADDR_LIMIT P3_ADDR_MAX +#else +#define __FAULT_ADDR_LIMIT VMALLOC_END +#endif + +/* + * Handle a fault on the vmalloc or module mapping area + */ +static noinline int vmalloc_fault(unsigned long address) +{ + pgd_t *pgd_k; + pmd_t *pmd_k; + pte_t *pte_k; + + /* Make sure we are in vmalloc/module/P3 area: */ + if (!(address >= VMALLOC_START && address < __FAULT_ADDR_LIMIT)) + return -1; + + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "current" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + pgd_k = get_TTB(); + pmd_k = vmalloc_sync_one(pgd_k, address); + if (!pmd_k) + return -1; + + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; + + return 0; +} + +static void +show_fault_oops(struct pt_regs *regs, unsigned long address) +{ + if (!oops_may_print()) + return; + + printk(KERN_ALERT "BUG: unable to handle kernel "); + if (address < PAGE_SIZE) + printk(KERN_CONT "NULL pointer dereference"); + else + printk(KERN_CONT "paging request"); + + printk(KERN_CONT " at %08lx\n", address); + printk(KERN_ALERT "PC:"); + printk_address(regs->pc, 1); + + show_pte(NULL, address); +} + +static noinline void +no_context(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return; + + if (handle_trapped_io(regs, address)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + bust_spinlocks(1); + + show_fault_oops(regs, address); + + die("Oops", regs, error_code); + bust_spinlocks(0); + do_exit(SIGKILL); +} + +static void +__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, + unsigned long address, int si_code) +{ + struct task_struct *tsk = current; + + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) { + /* + * It's possible to have interrupts off here: + */ + local_irq_enable(); + + force_sig_info_fault(SIGSEGV, si_code, address, tsk); + + return; + } + + no_context(regs, error_code, address); +} + +static noinline void +bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ + __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); +} + +static void +__bad_area(struct pt_regs *regs, unsigned long error_code, + unsigned long address, int si_code) +{ + struct mm_struct *mm = current->mm; + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ + up_read(&mm->mmap_sem); + + __bad_area_nosemaphore(regs, error_code, address, si_code); +} + +static noinline void +bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address) +{ + __bad_area(regs, error_code, address, SEGV_MAPERR); +} + +static noinline void +bad_area_access_error(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ + __bad_area(regs, error_code, address, SEGV_ACCERR); +} + +static void +do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + + up_read(&mm->mmap_sem); + + /* Kernel mode? Handle exceptions or die: */ + if (!user_mode(regs)) + no_context(regs, error_code, address); + + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); +} + +static noinline int +mm_fault_error(struct pt_regs *regs, unsigned long error_code, + unsigned long address, unsigned int fault) +{ + /* + * Pagefault was interrupted by SIGKILL. We have no reason to + * continue pagefault. + */ + if (fatal_signal_pending(current)) { + if (!(fault & VM_FAULT_RETRY)) + up_read(¤t->mm->mmap_sem); + if (!user_mode(regs)) + no_context(regs, error_code, address); + return 1; + } + + if (!(fault & VM_FAULT_ERROR)) + return 0; + + if (fault & VM_FAULT_OOM) { + /* Kernel mode? Handle exceptions or die: */ + if (!user_mode(regs)) { + up_read(¤t->mm->mmap_sem); + no_context(regs, error_code, address); + return 1; + } + up_read(¤t->mm->mmap_sem); + + /* + * We ran out of memory, call the OOM killer, and return the + * userspace (which will retry the fault, or kill us if we got + * oom-killed): + */ + pagefault_out_of_memory(); + } else { + if (fault & VM_FAULT_SIGBUS) + do_sigbus(regs, error_code, address); + else + BUG(); + } + + return 1; +} + +static inline int access_error(int error_code, struct vm_area_struct *vma) +{ + if (error_code & FAULT_CODE_WRITE) { + /* write, present and write, not present: */ + if (unlikely(!(vma->vm_flags & VM_WRITE))) + return 1; + return 0; + } + + /* ITLB miss on NX page */ + if (unlikely((error_code & FAULT_CODE_ITLB) && + !(vma->vm_flags & VM_EXEC))) + return 1; + + /* read, not present: */ + if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) + return 1; + + return 0; +} + +static int fault_in_kernel_space(unsigned long address) +{ + return address >= TASK_SIZE; +} + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + */ +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code, + unsigned long address) +{ + unsigned long vec; + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct * vma; + int fault; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + tsk = current; + mm = tsk->mm; + vec = lookup_exception_vector(); + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + */ + if (unlikely(fault_in_kernel_space(address))) { + if (vmalloc_fault(address) >= 0) + return; + if (notify_page_fault(regs, vec)) + return; + + bad_area_nosemaphore(regs, error_code, address); + return; + } + + if (unlikely(notify_page_fault(regs, vec))) + return; + + /* Only enable interrupts if they were on before the fault */ + if ((regs->sr & SR_IMASK) != SR_IMASK) + local_irq_enable(); + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + + /* + * If we're in an interrupt, have no user context or are running + * in an atomic region then we must not take the fault: + */ + if (unlikely(in_atomic() || !mm)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } + +retry: + down_read(&mm->mmap_sem); + + vma = find_vma(mm, address); + if (unlikely(!vma)) { + bad_area(regs, error_code, address); + return; + } + if (likely(vma->vm_start <= address)) + goto good_area; + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { + bad_area(regs, error_code, address); + return; + } + if (unlikely(expand_stack(vma, address))) { + bad_area(regs, error_code, address); + return; + } + + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + if (unlikely(access_error(error_code, vma))) { + bad_area_access_error(regs, error_code, address); + return; + } + + set_thread_fault_code(error_code); + + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (error_code & FAULT_CODE_WRITE) + flags |= FAULT_FLAG_WRITE; + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(mm, vma, address, flags); + + if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR))) + if (mm_fault_error(regs, error_code, address, fault)) + return; + + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + } + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + + /* + * No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + goto retry; + } + } + + up_read(&mm->mmap_sem); +} diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c deleted file mode 100644 index 8bf79e3b7bd..00000000000 --- a/arch/sh/mm/fault_32.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Page fault handler for SH with an MMU. - * - * Copyright (C) 1999 Niibe Yutaka - * Copyright (C) 2003 - 2009 Paul Mundt - * - * Based on linux/arch/i386/mm/fault.c: - * Copyright (C) 1995 Linus Torvalds - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/hardirq.h> -#include <linux/kprobes.h> -#include <linux/perf_event.h> -#include <asm/io_trapped.h> -#include <asm/system.h> -#include <asm/mmu_context.h> -#include <asm/tlbflush.h> - -static inline int notify_page_fault(struct pt_regs *regs, int trap) -{ - int ret = 0; - - if (kprobes_built_in() && !user_mode(regs)) { - preempt_disable(); - if (kprobe_running() && kprobe_fault_handler(regs, trap)) - ret = 1; - preempt_enable(); - } - - return ret; -} - -static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) -{ - unsigned index = pgd_index(address); - pgd_t *pgd_k; - pud_t *pud, *pud_k; - pmd_t *pmd, *pmd_k; - - pgd += index; - pgd_k = init_mm.pgd + index; - - if (!pgd_present(*pgd_k)) - return NULL; - - pud = pud_offset(pgd, address); - pud_k = pud_offset(pgd_k, address); - if (!pud_present(*pud_k)) - return NULL; - - if (!pud_present(*pud)) - set_pud(pud, *pud_k); - - pmd = pmd_offset(pud, address); - pmd_k = pmd_offset(pud_k, address); - if (!pmd_present(*pmd_k)) - return NULL; - - if (!pmd_present(*pmd)) - set_pmd(pmd, *pmd_k); - else { - /* - * The page tables are fully synchronised so there must - * be another reason for the fault. Return NULL here to - * signal that we have not taken care of the fault. - */ - BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); - return NULL; - } - - return pmd_k; -} - -/* - * Handle a fault on the vmalloc or module mapping area - */ -static noinline int vmalloc_fault(unsigned long address) -{ - pgd_t *pgd_k; - pmd_t *pmd_k; - pte_t *pte_k; - - /* Make sure we are in vmalloc/module/P3 area: */ - if (!(address >= VMALLOC_START && address < P3_ADDR_MAX)) - return -1; - - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "current" here. We might be inside - * an interrupt in the middle of a task switch.. - */ - pgd_k = get_TTB(); - pmd_k = vmalloc_sync_one(pgd_k, address); - if (!pmd_k) - return -1; - - pte_k = pte_offset_kernel(pmd_k, address); - if (!pte_present(*pte_k)) - return -1; - - return 0; -} - -static int fault_in_kernel_space(unsigned long address) -{ - return address >= TASK_SIZE; -} - -/* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate - * routines. - */ -asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, - unsigned long writeaccess, - unsigned long address) -{ - unsigned long vec; - struct task_struct *tsk; - struct mm_struct *mm; - struct vm_area_struct * vma; - int si_code; - int fault; - siginfo_t info; - - tsk = current; - mm = tsk->mm; - si_code = SEGV_MAPERR; - vec = lookup_exception_vector(); - - /* - * We fault-in kernel-space virtual memory on-demand. The - * 'reference' page table is init_mm.pgd. - * - * NOTE! We MUST NOT take any locks for this case. We may - * be in an interrupt or a critical region, and should - * only copy the information from the master page table, - * nothing more. - */ - if (unlikely(fault_in_kernel_space(address))) { - if (vmalloc_fault(address) >= 0) - return; - if (notify_page_fault(regs, vec)) - return; - - goto bad_area_nosemaphore; - } - - if (unlikely(notify_page_fault(regs, vec))) - return; - - /* Only enable interrupts if they were on before the fault */ - if ((regs->sr & SR_IMASK) != SR_IMASK) - local_irq_enable(); - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); - - /* - * If we're in an interrupt, have no user context or are running - * in an atomic region then we must not take the fault: - */ - if (in_atomic() || !mm) - goto no_context; - - down_read(&mm->mmap_sem); - - vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (expand_stack(vma, address)) - goto bad_area; - - /* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - si_code = SEGV_ACCERR; - if (writeaccess) { - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - } else { - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; - } - - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ -survive: - fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0); - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); - } - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, - regs, address); - } - - up_read(&mm->mmap_sem); - return; - - /* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - -bad_area_nosemaphore: - if (user_mode(regs)) { - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void *) address; - force_sig_info(SIGSEGV, &info, tsk); - return; - } - -no_context: - /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) - return; - - if (handle_trapped_io(regs, address)) - return; -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - * - */ - - bust_spinlocks(1); - - if (oops_may_print()) { - unsigned long page; - - if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL " - "pointer dereference"); - else - printk(KERN_ALERT "Unable to handle kernel paging " - "request"); - printk(" at virtual address %08lx\n", address); - printk(KERN_ALERT "pc = %08lx\n", regs->pc); - page = (unsigned long)get_TTB(); - if (page) { - page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT]; - printk(KERN_ALERT "*pde = %08lx\n", page); - if (page & _PAGE_PRESENT) { - page &= PAGE_MASK; - address &= 0x003ff000; - page = ((__typeof__(page) *) - __va(page))[address >> - PAGE_SHIFT]; - printk(KERN_ALERT "*pte = %08lx\n", page); - } - } - } - - die("Oops", regs, writeaccess); - bust_spinlocks(0); - do_exit(SIGKILL); - -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - up_read(&mm->mmap_sem); - if (is_global_init(current)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - printk("VM: killing process %s\n", tsk->comm); - if (user_mode(regs)) - do_group_exit(SIGKILL); - goto no_context; - -do_sigbus: - up_read(&mm->mmap_sem); - - /* - * Send a sigbus, regardless of whether we were in kernel - * or user mode. - */ - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void *)address; - force_sig_info(SIGBUS, &info, tsk); - - /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) - goto no_context; -} - -/* - * Called with interrupts disabled. - */ -asmlinkage int __kprobes -handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess, - unsigned long address) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - pte_t entry; - - /* - * We don't take page faults for P1, P2, and parts of P4, these - * are always mapped, whether it be due to legacy behaviour in - * 29-bit mode, or due to PMB configuration in 32-bit mode. - */ - if (address >= P3SEG && address < P3_ADDR_MAX) { - pgd = pgd_offset_k(address); - } else { - if (unlikely(address >= TASK_SIZE || !current->mm)) - return 1; - - pgd = pgd_offset(current->mm, address); - } - - pud = pud_offset(pgd, address); - if (pud_none_or_clear_bad(pud)) - return 1; - pmd = pmd_offset(pud, address); - if (pmd_none_or_clear_bad(pmd)) - return 1; - pte = pte_offset_kernel(pmd, address); - entry = *pte; - if (unlikely(pte_none(entry) || pte_not_present(entry))) - return 1; - if (unlikely(writeaccess && !pte_write(entry))) - return 1; - - if (writeaccess) - entry = pte_mkdirty(entry); - entry = pte_mkyoung(entry); - - set_pte(pte, entry); - -#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP) - /* - * SH-4 does not set MMUCR.RC to the corresponding TLB entry in - * the case of an initial page write exception, so we need to - * flush it in order to avoid potential TLB entry duplication. - */ - if (writeaccess == 2) - local_flush_tlb_one(get_asid(), address & PAGE_MASK); -#endif - - update_mmu_cache(NULL, address, pte); - - return 0; -} diff --git a/arch/sh/mm/fault_64.c b/arch/sh/mm/fault_64.c deleted file mode 100644 index 2b356cec248..00000000000 --- a/arch/sh/mm/fault_64.c +++ /dev/null @@ -1,266 +0,0 @@ -/* - * The SH64 TLB miss. - * - * Original code from fault.c - * Copyright (C) 2000, 2001 Paolo Alberelli - * - * Fast PTE->TLB refill path - * Copyright (C) 2003 Richard.Curnow@superh.com - * - * IMPORTANT NOTES : - * The do_fast_page_fault function is called from a context in entry.S - * where very few registers have been saved. In particular, the code in - * this file must be compiled not to use ANY caller-save registers that - * are not part of the restricted save set. Also, it means that code in - * this file must not make calls to functions elsewhere in the kernel, or - * else the excepting context will see corruption in its caller-save - * registers. Plus, the entry.S save area is non-reentrant, so this code - * has to run with SR.BL==1, i.e. no interrupts taken inside it and panic - * on any exception. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/interrupt.h> -#include <asm/system.h> -#include <asm/tlb.h> -#include <asm/io.h> -#include <asm/uaccess.h> -#include <asm/pgalloc.h> -#include <asm/mmu_context.h> -#include <cpu/registers.h> - -/* Callable from fault.c, so not static */ -inline void __do_tlb_refill(unsigned long address, - unsigned long long is_text_not_data, pte_t *pte) -{ - unsigned long long ptel; - unsigned long long pteh=0; - struct tlb_info *tlbp; - unsigned long long next; - - /* Get PTEL first */ - ptel = pte_val(*pte); - - /* - * Set PTEH register - */ - pteh = neff_sign_extend(address & MMU_VPN_MASK); - - /* Set the ASID. */ - pteh |= get_asid() << PTEH_ASID_SHIFT; - pteh |= PTEH_VALID; - - /* Set PTEL register, set_pte has performed the sign extension */ - ptel &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */ - - tlbp = is_text_not_data ? &(cpu_data->itlb) : &(cpu_data->dtlb); - next = tlbp->next; - __flush_tlb_slot(next); - asm volatile ("putcfg %0,1,%2\n\n\t" - "putcfg %0,0,%1\n" - : : "r" (next), "r" (pteh), "r" (ptel) ); - - next += TLB_STEP; - if (next > tlbp->last) next = tlbp->first; - tlbp->next = next; - -} - -static int handle_vmalloc_fault(struct mm_struct *mm, - unsigned long protection_flags, - unsigned long long textaccess, - unsigned long address) -{ - pgd_t *dir; - pud_t *pud; - pmd_t *pmd; - static pte_t *pte; - pte_t entry; - - dir = pgd_offset_k(address); - - pud = pud_offset(dir, address); - if (pud_none_or_clear_bad(pud)) - return 0; - - pmd = pmd_offset(pud, address); - if (pmd_none_or_clear_bad(pmd)) - return 0; - - pte = pte_offset_kernel(pmd, address); - entry = *pte; - - if (pte_none(entry) || !pte_present(entry)) - return 0; - if ((pte_val(entry) & protection_flags) != protection_flags) - return 0; - - __do_tlb_refill(address, textaccess, pte); - - return 1; -} - -static int handle_tlbmiss(struct mm_struct *mm, - unsigned long long protection_flags, - unsigned long long textaccess, - unsigned long address) -{ - pgd_t *dir; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - pte_t entry; - - /* NB. The PGD currently only contains a single entry - there is no - page table tree stored for the top half of the address space since - virtual pages in that region should never be mapped in user mode. - (In kernel mode, the only things in that region are the 512Mb super - page (locked in), and vmalloc (modules) + I/O device pages (handled - by handle_vmalloc_fault), so no PGD for the upper half is required - by kernel mode either). - - See how mm->pgd is allocated and initialised in pgd_alloc to see why - the next test is necessary. - RPC */ - if (address >= (unsigned long) TASK_SIZE) - /* upper half - never has page table entries. */ - return 0; - - dir = pgd_offset(mm, address); - if (pgd_none(*dir) || !pgd_present(*dir)) - return 0; - if (!pgd_present(*dir)) - return 0; - - pud = pud_offset(dir, address); - if (pud_none(*pud) || !pud_present(*pud)) - return 0; - - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd) || !pmd_present(*pmd)) - return 0; - - pte = pte_offset_kernel(pmd, address); - entry = *pte; - - if (pte_none(entry) || !pte_present(entry)) - return 0; - - /* - * If the page doesn't have sufficient protection bits set to - * service the kind of fault being handled, there's not much - * point doing the TLB refill. Punt the fault to the general - * handler. - */ - if ((pte_val(entry) & protection_flags) != protection_flags) - return 0; - - __do_tlb_refill(address, textaccess, pte); - - return 1; -} - -/* - * Put all this information into one structure so that everything is just - * arithmetic relative to a single base address. This reduces the number - * of movi/shori pairs needed just to load addresses of static data. - */ -struct expevt_lookup { - unsigned short protection_flags[8]; - unsigned char is_text_access[8]; - unsigned char is_write_access[8]; -}; - -#define PRU (1<<9) -#define PRW (1<<8) -#define PRX (1<<7) -#define PRR (1<<6) - -#define DIRTY (_PAGE_DIRTY | _PAGE_ACCESSED) -#define YOUNG (_PAGE_ACCESSED) - -/* Sized as 8 rather than 4 to allow checking the PTE's PRU bit against whether - the fault happened in user mode or privileged mode. */ -static struct expevt_lookup expevt_lookup_table = { - .protection_flags = {PRX, PRX, 0, 0, PRR, PRR, PRW, PRW}, - .is_text_access = {1, 1, 0, 0, 0, 0, 0, 0} -}; - -/* - This routine handles page faults that can be serviced just by refilling a - TLB entry from an existing page table entry. (This case represents a very - large majority of page faults.) Return 1 if the fault was successfully - handled. Return 0 if the fault could not be handled. (This leads into the - general fault handling in fault.c which deals with mapping file-backed - pages, stack growth, segmentation faults, swapping etc etc) - */ -asmlinkage int do_fast_page_fault(unsigned long long ssr_md, - unsigned long long expevt, - unsigned long address) -{ - struct task_struct *tsk; - struct mm_struct *mm; - unsigned long long textaccess; - unsigned long long protection_flags; - unsigned long long index; - unsigned long long expevt4; - - /* The next few lines implement a way of hashing EXPEVT into a - * small array index which can be used to lookup parameters - * specific to the type of TLBMISS being handled. - * - * Note: - * ITLBMISS has EXPEVT==0xa40 - * RTLBMISS has EXPEVT==0x040 - * WTLBMISS has EXPEVT==0x060 - */ - expevt4 = (expevt >> 4); - /* TODO : xor ssr_md into this expression too. Then we can check - * that PRU is set when it needs to be. */ - index = expevt4 ^ (expevt4 >> 5); - index &= 7; - protection_flags = expevt_lookup_table.protection_flags[index]; - textaccess = expevt_lookup_table.is_text_access[index]; - - /* SIM - * Note this is now called with interrupts still disabled - * This is to cope with being called for a missing IO port - * address with interrupts disabled. This should be fixed as - * soon as we have a better 'fast path' miss handler. - * - * Plus take care how you try and debug this stuff. - * For example, writing debug data to a port which you - * have just faulted on is not going to work. - */ - - tsk = current; - mm = tsk->mm; - - if ((address >= VMALLOC_START && address < VMALLOC_END) || - (address >= IOBASE_VADDR && address < IOBASE_END)) { - if (ssr_md) - /* - * Process-contexts can never have this address - * range mapped - */ - if (handle_vmalloc_fault(mm, protection_flags, - textaccess, address)) - return 1; - } else if (!in_interrupt() && mm) { - if (handle_tlbmiss(mm, protection_flags, textaccess, address)) - return 1; - } - - return 0; -} diff --git a/arch/sh/mm/flush-sh4.c b/arch/sh/mm/flush-sh4.c index cef402678f4..0b85dd9dd3a 100644 --- a/arch/sh/mm/flush-sh4.c +++ b/arch/sh/mm/flush-sh4.c @@ -1,6 +1,8 @@ #include <linux/mm.h> #include <asm/mmu_context.h> +#include <asm/cache_insns.h> #include <asm/cacheflush.h> +#include <asm/traps.h> /* * Write back the dirty D-caches, but not invalidate them. diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c new file mode 100644 index 00000000000..bf8daf9d9c9 --- /dev/null +++ b/arch/sh/mm/gup.c @@ -0,0 +1,273 @@ +/* + * Lockless get_user_pages_fast for SuperH + * + * Copyright (C) 2009 - 2010 Paul Mundt + * + * Cloned from the x86 and PowerPC versions, by: + * + * Copyright (C) 2008 Nick Piggin + * Copyright (C) 2008 Novell Inc. + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmstat.h> +#include <linux/highmem.h> +#include <asm/pgtable.h> + +static inline pte_t gup_get_pte(pte_t *ptep) +{ +#ifndef CONFIG_X2TLB + return ACCESS_ONCE(*ptep); +#else + /* + * With get_user_pages_fast, we walk down the pagetables without + * taking any locks. For this we would like to load the pointers + * atomically, but that is not possible with 64-bit PTEs. What + * we do have is the guarantee that a pte will only either go + * from not present to present, or present to not present or both + * -- it will not switch to a completely different present page + * without a TLB flush in between; something that we are blocking + * by holding interrupts off. + * + * Setting ptes from not present to present goes: + * ptep->pte_high = h; + * smp_wmb(); + * ptep->pte_low = l; + * + * And present to not present goes: + * ptep->pte_low = 0; + * smp_wmb(); + * ptep->pte_high = 0; + * + * We must ensure here that the load of pte_low sees l iff pte_high + * sees h. We load pte_high *after* loading pte_low, which ensures we + * don't see an older value of pte_high. *Then* we recheck pte_low, + * which ensures that we haven't picked up a changed pte high. We might + * have got rubbish values from pte_low and pte_high, but we are + * guaranteed that pte_low will not have the present bit set *unless* + * it is 'l'. And get_user_pages_fast only operates on present ptes, so + * we're safe. + * + * gup_get_pte should not be used or copied outside gup.c without being + * very careful -- it does not atomically load the pte or anything that + * is likely to be useful for you. + */ + pte_t pte; + +retry: + pte.pte_low = ptep->pte_low; + smp_rmb(); + pte.pte_high = ptep->pte_high; + smp_rmb(); + if (unlikely(pte.pte_low != ptep->pte_low)) + goto retry; + + return pte; +#endif +} + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + u64 mask, result; + pte_t *ptep; + +#ifdef CONFIG_X2TLB + result = _PAGE_PRESENT | _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ); + if (write) + result |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE); +#elif defined(CONFIG_SUPERH64) + result = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ; + if (write) + result |= _PAGE_WRITE; +#else + result = _PAGE_PRESENT | _PAGE_USER; + if (write) + result |= _PAGE_RW; +#endif + + mask = result | _PAGE_SPECIAL; + + ptep = pte_offset_map(&pmd, addr); + do { + pte_t pte = gup_get_pte(ptep); + struct page *page; + + if ((pte_val(pte) & mask) != result) { + pte_unmap(ptep); + return 0; + } + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + get_page(page); + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + pte_unmap(ptep - 1); + + return 1; +} + +static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp; + + pmdp = pmd_offset(&pud, addr); + do { + pmd_t pmd = *pmdp; + + next = pmd_addr_end(addr, end); + if (pmd_none(pmd)) + return 0; + if (!gup_pte_range(pmd, addr, next, write, pages, nr)) + return 0; + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp; + + pudp = pud_offset(&pgd, addr); + do { + pud_t pud = *pudp; + + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (!gup_pmd_range(pud, addr, next, write, pages, nr)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; +} + +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + unsigned long flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; + + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables and pages from being freed. + */ + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + +/** + * get_user_pages_fast() - pin user pages in memory + * @start: starting user address + * @nr_pages: number of pages from start to pin + * @write: whether pages will be written to + * @pages: array that receives pointers to the pages pinned. + * Should be at least nr_pages long. + * + * Attempt to pin user pages in memory without taking mm->mmap_sem. + * If not successful, it will fall back to taking the lock and + * calling get_user_pages(). + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. + */ +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + + end = start + len; + if (end < start) + goto slow_irqon; + + local_irq_disable(); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; + + { + int ret; + +slow: + local_irq_enable(); +slow_irqon: + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + + return ret; + } +} diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 9304117039c..d7762349ea4 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -13,7 +13,6 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/pagemap.h> -#include <linux/slab.h> #include <linux/sysctl.h> #include <asm/mman.h> @@ -36,7 +35,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, if (pud) { pmd = pmd_alloc(mm, pud, addr); if (pmd) - pte = pte_alloc_map(mm, pmd, addr); + pte = pte_alloc_map(mm, NULL, pmd, addr); } } diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 68028e8f26c..2d089fe2cba 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -2,7 +2,7 @@ * linux/arch/sh/mm/init.c * * Copyright (C) 1999 Niibe Yutaka - * Copyright (C) 2002 - 2007 Paul Mundt + * Copyright (C) 2002 - 2011 Paul Mundt * * Based on linux/arch/i386/mm/init.c: * Copyright (C) 1995 Linus Torvalds @@ -10,29 +10,43 @@ #include <linux/mm.h> #include <linux/swap.h> #include <linux/init.h> +#include <linux/gfp.h> #include <linux/bootmem.h> #include <linux/proc_fs.h> #include <linux/pagemap.h> #include <linux/percpu.h> #include <linux/io.h> +#include <linux/memblock.h> #include <linux/dma-mapping.h> +#include <linux/export.h> #include <asm/mmu_context.h> +#include <asm/mmzone.h> +#include <asm/kexec.h> #include <asm/tlb.h> #include <asm/cacheflush.h> #include <asm/sections.h> +#include <asm/setup.h> #include <asm/cache.h> #include <asm/sizes.h> -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); pgd_t swapper_pg_dir[PTRS_PER_PGD]; +void __init generic_mem_init(void) +{ + memblock_add(__MEMORY_START, __MEMORY_SIZE); +} + +void __init __weak plat_mem_setup(void) +{ + /* Nothing to see here, move along. */ +} + #ifdef CONFIG_MMU static pte_t *__get_pte_phys(unsigned long addr) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte; pgd = pgd_offset_k(addr); if (pgd_none(*pgd)) { @@ -52,8 +66,7 @@ static pte_t *__get_pte_phys(unsigned long addr) return NULL; } - pte = pte_offset_kernel(pmd, addr); - return pte; + return pte_offset_kernel(pmd, addr); } static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot) @@ -110,13 +123,45 @@ void __clear_fixmap(enum fixed_addresses idx, pgprot_t prot) clear_pte_phys(address, prot); } +static pmd_t * __init one_md_table_init(pud_t *pud) +{ + if (pud_none(*pud)) { + pmd_t *pmd; + + pmd = alloc_bootmem_pages(PAGE_SIZE); + pud_populate(&init_mm, pud, pmd); + BUG_ON(pmd != pmd_offset(pud, 0)); + } + + return pmd_offset(pud, 0); +} + +static pte_t * __init one_page_table_init(pmd_t *pmd) +{ + if (pmd_none(*pmd)) { + pte_t *pte; + + pte = alloc_bootmem_pages(PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd, pte); + BUG_ON(pte != pte_offset_kernel(pmd, 0)); + } + + return pte_offset_kernel(pmd, 0); +} + +static pte_t * __init page_table_kmap_check(pte_t *pte, pmd_t *pmd, + unsigned long vaddr, pte_t *lastpte) +{ + return pte; +} + void __init page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte; + pte_t *pte = NULL; int i, j, k; unsigned long vaddr; @@ -129,19 +174,13 @@ void __init page_table_range_init(unsigned long start, unsigned long end, for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { pud = (pud_t *)pgd; for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) { -#ifdef __PAGETABLE_PMD_FOLDED - pmd = (pmd_t *)pud; -#else - pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); - pud_populate(&init_mm, pud, pmd); + pmd = one_md_table_init(pud); +#ifndef __PAGETABLE_PMD_FOLDED pmd += k; #endif for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) { - if (pmd_none(*pmd)) { - pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - pmd_populate_kernel(&init_mm, pmd, pte); - BUG_ON(pte != pte_offset_kernel(pmd, 0)); - } + pte = page_table_kmap_check(one_page_table_init(pmd), + pmd, vaddr, pte); vaddr += PMD_SIZE; } k = 0; @@ -151,15 +190,172 @@ void __init page_table_range_init(unsigned long start, unsigned long end, } #endif /* CONFIG_MMU */ -/* - * paging_init() sets up the page tables - */ +void __init allocate_pgdat(unsigned int nid) +{ + unsigned long start_pfn, end_pfn; +#ifdef CONFIG_NEED_MULTIPLE_NODES + unsigned long phys; +#endif + + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + +#ifdef CONFIG_NEED_MULTIPLE_NODES + phys = __memblock_alloc_base(sizeof(struct pglist_data), + SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT); + /* Retry with all of system memory */ + if (!phys) + phys = __memblock_alloc_base(sizeof(struct pglist_data), + SMP_CACHE_BYTES, memblock_end_of_DRAM()); + if (!phys) + panic("Can't allocate pgdat for node %d\n", nid); + + NODE_DATA(nid) = __va(phys); + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; +#endif + + NODE_DATA(nid)->node_start_pfn = start_pfn; + NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; +} + +static void __init bootmem_init_one_node(unsigned int nid) +{ + unsigned long total_pages, paddr; + unsigned long end_pfn; + struct pglist_data *p; + + p = NODE_DATA(nid); + + /* Nothing to do.. */ + if (!p->node_spanned_pages) + return; + + end_pfn = pgdat_end_pfn(p); + + total_pages = bootmem_bootmap_pages(p->node_spanned_pages); + + paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE); + if (!paddr) + panic("Can't allocate bootmap for nid[%d]\n", nid); + + init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn); + + free_bootmem_with_active_regions(nid, end_pfn); + + /* + * XXX Handle initial reservations for the system memory node + * only for the moment, we'll refactor this later for handling + * reservations in other nodes. + */ + if (nid == 0) { + struct memblock_region *reg; + + /* Reserve the sections we're already using. */ + for_each_memblock(reserved, reg) { + reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); + } + } + + sparse_memory_present_with_active_regions(nid); +} + +static void __init do_init_bootmem(void) +{ + struct memblock_region *reg; + int i; + + /* Add active regions with valid PFNs. */ + for_each_memblock(memory, reg) { + unsigned long start_pfn, end_pfn; + start_pfn = memblock_region_memory_base_pfn(reg); + end_pfn = memblock_region_memory_end_pfn(reg); + __add_active_range(0, start_pfn, end_pfn); + } + + /* All of system RAM sits in node 0 for the non-NUMA case */ + allocate_pgdat(0); + node_set_online(0); + + plat_mem_setup(); + + for_each_online_node(i) + bootmem_init_one_node(i); + + sparse_init(); +} + +static void __init early_reserve_mem(void) +{ + unsigned long start_pfn; + u32 zero_base = (u32)__MEMORY_START + (u32)PHYSICAL_OFFSET; + u32 start = zero_base + (u32)CONFIG_ZERO_PAGE_OFFSET; + + /* + * Partially used pages are not usable - thus + * we are rounding upwards: + */ + start_pfn = PFN_UP(__pa(_end)); + + /* + * Reserve the kernel text and Reserve the bootmem bitmap. We do + * this in two steps (first step was init_bootmem()), because + * this catches the (definitely buggy) case of us accidentally + * initializing the bootmem allocator with an invalid RAM area. + */ + memblock_reserve(start, (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) - start); + + /* + * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET. + */ + if (CONFIG_ZERO_PAGE_OFFSET != 0) + memblock_reserve(zero_base, CONFIG_ZERO_PAGE_OFFSET); + + /* + * Handle additional early reservations + */ + check_for_initrd(); + reserve_crashkernel(); +} + void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; unsigned long vaddr, end; int nid; + sh_mv.mv_mem_init(); + + early_reserve_mem(); + + /* + * Once the early reservations are out of the way, give the + * platforms a chance to kick out some memory. + */ + if (sh_mv.mv_mem_reserve) + sh_mv.mv_mem_reserve(); + + memblock_enforce_memory_limit(memory_limit); + memblock_allow_resize(); + + memblock_dump_all(); + + /* + * Determine low and high memory ranges: + */ + max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; + min_low_pfn = __MEMORY_START >> PAGE_SHIFT; + + nodes_clear(node_online_map); + + memory_start = (unsigned long)__va(__MEMORY_START); + memory_end = memory_start + (memory_limit ?: memblock_phys_mem_size()); + + uncached_init(); + pmb_init(); + do_init_bootmem(); + ioremap_fixed_init(); + /* We don't need to map the kernel through the TLB, as * it is permanatly mapped using P1. So clear the * entire pgd. */ @@ -211,32 +407,16 @@ unsigned int mem_init_done = 0; void __init mem_init(void) { - int codesize, datasize, initsize; - int nid; + pg_data_t *pgdat; iommu_init(); - num_physpages = 0; high_memory = NULL; + for_each_online_pgdat(pgdat) + high_memory = max_t(void *, high_memory, + __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT)); - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); - unsigned long node_pages = 0; - void *node_high_memory; - - num_physpages += pgdat->node_present_pages; - - if (pgdat->node_spanned_pages) - node_pages = free_all_bootmem_node(pgdat); - - totalram_pages += node_pages; - - node_high_memory = (void *)__va((pgdat->node_start_pfn + - pgdat->node_spanned_pages) << - PAGE_SHIFT); - if (node_high_memory > high_memory) - high_memory = node_high_memory; - } + free_all_bootmem(); /* Set this up early, so we can take care of the zero page */ cpu_cache_init(); @@ -247,19 +427,8 @@ void __init mem_init(void) vsyscall_init(); - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " - "%dk data, %dk init)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - datasize >> 10, - initsize >> 10); - - printk(KERN_INFO "virtual kernel memory layout:\n" + mem_init_print_info(NULL); + pr_info("virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" @@ -305,31 +474,13 @@ void __init mem_init(void) void free_initmem(void) { - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - printk("Freeing unused kernel memory: %ldk freed\n", - ((unsigned long)&__init_end - - (unsigned long)&__init_begin) >> 10); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - unsigned long p; - for (p = start; p < end; p += PAGE_SIZE) { - ClearPageReserved(virt_to_page(p)); - init_page_count(virt_to_page(p)); - free_page(p); - totalram_pages++; - } - printk("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif @@ -362,4 +513,21 @@ int memory_add_physaddr_to_nid(u64 addr) EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + int ret; + + zone = page_zone(pfn_to_page(start_pfn)); + ret = __remove_pages(zone, start_pfn, nr_pages); + if (unlikely(ret)) + pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, + ret); + + return ret; +} +#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index 1ab2385ecef..0c99ec2e7ed 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -14,6 +14,7 @@ */ #include <linux/vmalloc.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/mm.h> #include <linux/pci.h> #include <linux/io.h> diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c index 7f682e5dafc..efbe84af998 100644 --- a/arch/sh/mm/ioremap_fixed.c +++ b/arch/sh/mm/ioremap_fixed.c @@ -15,7 +15,6 @@ #include <linux/io.h> #include <linux/bootmem.h> #include <linux/proc_fs.h> -#include <linux/slab.h> #include <asm/fixmap.h> #include <asm/page.h> #include <asm/pgalloc.h> diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c index 15d74ea4209..ec29e14ec5a 100644 --- a/arch/sh/mm/kmap.c +++ b/arch/sh/mm/kmap.c @@ -34,7 +34,7 @@ void *kmap_coherent(struct page *page, unsigned long addr) enum fixed_addresses idx; unsigned long vaddr; - BUG_ON(test_bit(PG_dcache_dirty, &page->flags)); + BUG_ON(!test_bit(PG_dcache_clean, &page->flags)); pagefault_disable(); diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c index afeb710ec5c..6777177807c 100644 --- a/arch/sh/mm/mmap.c +++ b/arch/sh/mm/mmap.c @@ -30,25 +30,13 @@ static inline unsigned long COLOUR_ALIGN(unsigned long addr, return base + off; } -static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr, - unsigned long pgoff) -{ - unsigned long base = addr & ~shm_align_mask; - unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask; - - if (base + off <= addr) - return base + off; - - return base - off; -} - unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; int do_colour_align; + struct vm_unmapped_area_info info; if (flags & MAP_FIXED) { /* We do not accept a shared mapping if it would violate @@ -79,47 +67,13 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, return addr; } - if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; - } else { - mm->cached_hole_size = 0; - start_addr = addr = TASK_UNMAPPED_BASE; - } - -full_search: - if (do_colour_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(mm->free_area_cache); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (unlikely(TASK_SIZE - len < addr)) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (likely(!vma || addr + len <= vma->vm_start)) { - /* - * Remember the place where we stopped the search: - */ - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - - addr = vma->vm_end; - if (do_colour_align) - addr = COLOUR_ALIGN(addr, pgoff); - } + info.flags = 0; + info.length = len; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + info.align_mask = do_colour_align ? (PAGE_MASK & shm_align_mask) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); } unsigned long @@ -131,6 +85,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, struct mm_struct *mm = current->mm; unsigned long addr = addr0; int do_colour_align; + struct vm_unmapped_area_info info; if (flags & MAP_FIXED) { /* We do not accept a shared mapping if it would violate @@ -162,73 +117,27 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } - /* check if free_area_cache is useful for us */ - if (len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; - mm->free_area_cache = mm->mmap_base; - } - - /* either no address requested or can't fit in requested address hole */ - addr = mm->free_area_cache; - if (do_colour_align) { - unsigned long base = COLOUR_ALIGN_DOWN(addr-len, pgoff); + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = PAGE_SIZE; + info.high_limit = mm->mmap_base; + info.align_mask = do_colour_align ? (PAGE_MASK & shm_align_mask) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + addr = vm_unmapped_area(&info); - addr = base + len; - } - - /* make sure it can fit in the remaining address space */ - if (likely(addr > len)) { - vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) { - /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); - } - } - - if (unlikely(mm->mmap_base < len)) - goto bottomup; - - addr = mm->mmap_base-len; - if (do_colour_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); - - do { - /* - * Lookup failure means no vma is above this address, - * else if new region fits below vma->vm_start, - * return with success: - */ - vma = find_vma(mm, addr); - if (likely(!vma || addr+len <= vma->vm_start)) { - /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); - } - - /* remember the largest hole we saw so far */ - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - if (do_colour_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); - } while (likely(len < vma->vm_start)); - -bottomup: /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ - mm->cached_hole_size = ~0UL; - mm->free_area_cache = TASK_UNMAPPED_BASE; - addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); - /* - * Restore the topdown base: - */ - mm->free_area_cache = mm->mmap_base; - mm->cached_hole_size = ~0UL; + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } return addr; } @@ -238,7 +147,7 @@ bottomup: * You really shouldn't be using read() or write() on /dev/mem. This * might go away in the future. */ -int valid_phys_addr_range(unsigned long addr, size_t count) +int valid_phys_addr_range(phys_addr_t addr, size_t count) { if (addr < __MEMORY_START) return 0; diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c index 7694f50c903..36312d254fa 100644 --- a/arch/sh/mm/nommu.c +++ b/arch/sh/mm/nommu.c @@ -67,6 +67,10 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) BUG(); } +void __flush_tlb_global(void) +{ +} + void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) { } diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c index 961b34085e3..3d85225b9e9 100644 --- a/arch/sh/mm/numa.c +++ b/arch/sh/mm/numa.c @@ -9,7 +9,7 @@ */ #include <linux/module.h> #include <linux/bootmem.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/numa.h> #include <linux/pfn.h> @@ -24,44 +24,6 @@ EXPORT_SYMBOL_GPL(node_data); * latency. Each node's pgdat is node-local at the beginning of the node, * immediately followed by the node mem map. */ -void __init setup_memory(void) -{ - unsigned long free_pfn = PFN_UP(__pa(_end)); - u64 base = min_low_pfn << PAGE_SHIFT; - u64 size = (max_low_pfn << PAGE_SHIFT) - base; - - lmb_add(base, size); - - /* Reserve the LMB regions used by the kernel, initrd, etc.. */ - lmb_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET, - (PFN_PHYS(free_pfn) + PAGE_SIZE - 1) - - (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET)); - - /* - * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET. - */ - if (CONFIG_ZERO_PAGE_OFFSET != 0) - lmb_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET); - - lmb_analyze(); - lmb_dump_all(); - - /* - * Node 0 sets up its pgdat at the first available pfn, - * and bumps it up before setting up the bootmem allocator. - */ - NODE_DATA(0) = pfn_to_kaddr(free_pfn); - memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); - free_pfn += PFN_UP(sizeof(struct pglist_data)); - NODE_DATA(0)->bdata = &bootmem_node_data[0]; - - /* Set up node 0 */ - setup_bootmem_allocator(free_pfn); - - /* Give the platforms a chance to hook up their nodes */ - plat_mem_setup(); -} - void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) { unsigned long bootmap_pages; @@ -77,12 +39,12 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) pmb_bolt_mapping((unsigned long)__va(start), start, end - start, PAGE_KERNEL); - lmb_add(start, end - start); + memblock_add(start, end - start); __add_active_range(nid, start_pfn, end_pfn); /* Node-local pgdat */ - NODE_DATA(nid) = __va(lmb_alloc_base(sizeof(struct pglist_data), + NODE_DATA(nid) = __va(memblock_alloc_base(sizeof(struct pglist_data), SMP_CACHE_BYTES, end)); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); @@ -92,7 +54,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) /* Node-local bootmap */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmem_paddr = lmb_alloc_base(bootmap_pages << PAGE_SHIFT, + bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT, PAGE_SIZE, end); init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, start_pfn, end_pfn); diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c index 6f21fb1d872..26e03a1f7ca 100644 --- a/arch/sh/mm/pgtable.c +++ b/arch/sh/mm/pgtable.c @@ -1,4 +1,5 @@ #include <linux/mm.h> +#include <linux/slab.h> #define PGALLOC_GFP GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index a4662e2782c..7160c9fd6fe 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -3,7 +3,7 @@ * * Privileged Space Mapping Buffer (PMB) Support. * - * Copyright (C) 2005 - 2010 Paul Mundt + * Copyright (C) 2005 - 2011 Paul Mundt * Copyright (C) 2010 Matt Fleming * * This file is subject to the terms and conditions of the GNU General Public @@ -12,10 +12,9 @@ */ #include <linux/init.h> #include <linux/kernel.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/cpu.h> #include <linux/module.h> -#include <linux/slab.h> #include <linux/bitops.h> #include <linux/debugfs.h> #include <linux/fs.h> @@ -26,7 +25,6 @@ #include <linux/vmalloc.h> #include <asm/cacheflush.h> #include <asm/sizes.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/page.h> @@ -41,7 +39,7 @@ struct pmb_entry { unsigned long flags; unsigned long size; - spinlock_t lock; + raw_spinlock_t lock; /* * 0 .. NR_PMB_ENTRIES for specific entry selection, or @@ -266,7 +264,7 @@ static struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn, memset(pmbe, 0, sizeof(struct pmb_entry)); - spin_lock_init(&pmbe->lock); + raw_spin_lock_init(&pmbe->lock); pmbe->vpn = vpn; pmbe->ppn = ppn; @@ -323,14 +321,16 @@ static void __clear_pmb_entry(struct pmb_entry *pmbe) writel_uncached(data_val & ~PMB_V, data); } +#ifdef CONFIG_PM static void set_pmb_entry(struct pmb_entry *pmbe) { unsigned long flags; - spin_lock_irqsave(&pmbe->lock, flags); + raw_spin_lock_irqsave(&pmbe->lock, flags); __set_pmb_entry(pmbe); - spin_unlock_irqrestore(&pmbe->lock, flags); + raw_spin_unlock_irqrestore(&pmbe->lock, flags); } +#endif /* CONFIG_PM */ int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys, unsigned long size, pgprot_t prot) @@ -340,6 +340,8 @@ int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys, unsigned long flags, pmb_flags; int i, mapped; + if (size < SZ_16M) + return -EINVAL; if (!pmb_addr_valid(vaddr, size)) return -EFAULT; if (pmb_mapping_exists(vaddr, phys, size)) @@ -365,7 +367,7 @@ int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys, return PTR_ERR(pmbe); } - spin_lock_irqsave(&pmbe->lock, flags); + raw_spin_lock_irqsave(&pmbe->lock, flags); pmbe->size = pmb_sizes[i].size; @@ -380,9 +382,10 @@ int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys, * entries for easier tear-down. */ if (likely(pmbp)) { - spin_lock(&pmbp->lock); + raw_spin_lock_nested(&pmbp->lock, + SINGLE_DEPTH_NESTING); pmbp->link = pmbe; - spin_unlock(&pmbp->lock); + raw_spin_unlock(&pmbp->lock); } pmbp = pmbe; @@ -395,7 +398,7 @@ int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys, i--; mapped++; - spin_unlock_irqrestore(&pmbe->lock, flags); + raw_spin_unlock_irqrestore(&pmbe->lock, flags); } } while (size >= SZ_16M); @@ -624,15 +627,14 @@ static void __init pmb_synchronize(void) continue; } - spin_lock_irqsave(&pmbe->lock, irqflags); + raw_spin_lock_irqsave(&pmbe->lock, irqflags); for (j = 0; j < ARRAY_SIZE(pmb_sizes); j++) if (pmb_sizes[j].flag == size) pmbe->size = pmb_sizes[j].size; if (pmbp) { - spin_lock(&pmbp->lock); - + raw_spin_lock_nested(&pmbp->lock, SINGLE_DEPTH_NESTING); /* * Compare the previous entry against the current one to * see if the entries span a contiguous mapping. If so, @@ -641,13 +643,12 @@ static void __init pmb_synchronize(void) */ if (pmb_can_merge(pmbp, pmbe)) pmbp->link = pmbe; - - spin_unlock(&pmbp->lock); + raw_spin_unlock(&pmbp->lock); } pmbp = pmbe; - spin_unlock_irqrestore(&pmbe->lock, irqflags); + raw_spin_unlock_irqrestore(&pmbe->lock, irqflags); } } @@ -679,7 +680,7 @@ static void __init pmb_merge(struct pmb_entry *head) /* * The merged page size must be valid. */ - if (!pmb_size_valid(newsize)) + if (!depth || !pmb_size_valid(newsize)) return; head->flags &= ~PMB_SZ_MASK; @@ -754,7 +755,7 @@ static void __init pmb_resize(void) /* * Found it, now resize it. */ - spin_lock_irqsave(&pmbe->lock, flags); + raw_spin_lock_irqsave(&pmbe->lock, flags); pmbe->size = SZ_16M; pmbe->flags &= ~PMB_SZ_MASK; @@ -764,10 +765,10 @@ static void __init pmb_resize(void) __set_pmb_entry(pmbe); - spin_unlock_irqrestore(&pmbe->lock, flags); + raw_spin_unlock_irqrestore(&pmbe->lock, flags); } - read_lock(&pmb_rwlock); + read_unlock(&pmb_rwlock); } #endif @@ -802,7 +803,7 @@ void __init pmb_init(void) writel_uncached(0, PMB_IRMCR); /* Flush out the TLB */ - __raw_writel(__raw_readl(MMUCR) | MMUCR_TI, MMUCR); + local_flush_tlb_all(); ctrl_barrier(); } @@ -863,57 +864,40 @@ static int __init pmb_debugfs_init(void) struct dentry *dentry; dentry = debugfs_create_file("pmb", S_IFREG | S_IRUGO, - sh_debugfs_root, NULL, &pmb_debugfs_fops); + arch_debugfs_dir, NULL, &pmb_debugfs_fops); if (!dentry) return -ENOMEM; - if (IS_ERR(dentry)) - return PTR_ERR(dentry); return 0; } subsys_initcall(pmb_debugfs_init); #ifdef CONFIG_PM -static int pmb_sysdev_suspend(struct sys_device *dev, pm_message_t state) +static void pmb_syscore_resume(void) { - static pm_message_t prev_state; + struct pmb_entry *pmbe; int i; - /* Restore the PMB after a resume from hibernation */ - if (state.event == PM_EVENT_ON && - prev_state.event == PM_EVENT_FREEZE) { - struct pmb_entry *pmbe; - - read_lock(&pmb_rwlock); + read_lock(&pmb_rwlock); - for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) { - if (test_bit(i, pmb_map)) { - pmbe = &pmb_entry_list[i]; - set_pmb_entry(pmbe); - } + for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) { + if (test_bit(i, pmb_map)) { + pmbe = &pmb_entry_list[i]; + set_pmb_entry(pmbe); } - - read_unlock(&pmb_rwlock); } - prev_state = state; - - return 0; -} - -static int pmb_sysdev_resume(struct sys_device *dev) -{ - return pmb_sysdev_suspend(dev, PMSG_ON); + read_unlock(&pmb_rwlock); } -static struct sysdev_driver pmb_sysdev_driver = { - .suspend = pmb_sysdev_suspend, - .resume = pmb_sysdev_resume, +static struct syscore_ops pmb_syscore_ops = { + .resume = pmb_syscore_resume, }; static int __init pmb_sysdev_init(void) { - return sysdev_driver_register(&cpu_sysdev_class, &pmb_sysdev_driver); + register_syscore_ops(&pmb_syscore_ops); + return 0; } subsys_initcall(pmb_sysdev_init); #endif diff --git a/arch/sh/mm/sram.c b/arch/sh/mm/sram.c new file mode 100644 index 00000000000..2d8fa718d55 --- /dev/null +++ b/arch/sh/mm/sram.c @@ -0,0 +1,35 @@ +/* + * SRAM pool for tiny memories not otherwise managed. + * + * Copyright (C) 2010 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <asm/sram.h> + +/* + * This provides a standard SRAM pool for tiny memories that can be + * added either by the CPU or the platform code. Typical SRAM sizes + * to be inserted in to the pool will generally be less than the page + * size, with anything more reasonably sized handled as a NUMA memory + * node. + */ +struct gen_pool *sram_pool; + +static int __init sram_pool_init(void) +{ + /* + * This is a global pool, we don't care about node locality. + */ + sram_pool = gen_pool_create(1, -1); + if (unlikely(!sram_pool)) + return -ENOMEM; + + return 0; +} +core_initcall(sram_pool_init); diff --git a/arch/sh/mm/tlb-debugfs.c b/arch/sh/mm/tlb-debugfs.c new file mode 100644 index 00000000000..dea637a0924 --- /dev/null +++ b/arch/sh/mm/tlb-debugfs.c @@ -0,0 +1,172 @@ +/* + * arch/sh/mm/tlb-debugfs.c + * + * debugfs ops for SH-4 ITLB/UTLBs. + * + * Copyright (C) 2010 Matt Fleming + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <asm/processor.h> +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> + +enum tlb_type { + TLB_TYPE_ITLB, + TLB_TYPE_UTLB, +}; + +static struct { + int bits; + const char *size; +} tlb_sizes[] = { + { 0x0, " 1KB" }, + { 0x1, " 4KB" }, + { 0x2, " 8KB" }, + { 0x4, " 64KB" }, + { 0x5, "256KB" }, + { 0x7, " 1MB" }, + { 0x8, " 4MB" }, + { 0xc, " 64MB" }, +}; + +static int tlb_seq_show(struct seq_file *file, void *iter) +{ + unsigned int tlb_type = (unsigned int)file->private; + unsigned long addr1, addr2, data1, data2; + unsigned long flags; + unsigned long mmucr; + unsigned int nentries, entry; + unsigned int urb; + + mmucr = __raw_readl(MMUCR); + if ((mmucr & 0x1) == 0) { + seq_printf(file, "address translation disabled\n"); + return 0; + } + + if (tlb_type == TLB_TYPE_ITLB) { + addr1 = MMU_ITLB_ADDRESS_ARRAY; + addr2 = MMU_ITLB_ADDRESS_ARRAY2; + data1 = MMU_ITLB_DATA_ARRAY; + data2 = MMU_ITLB_DATA_ARRAY2; + nentries = 4; + } else { + addr1 = MMU_UTLB_ADDRESS_ARRAY; + addr2 = MMU_UTLB_ADDRESS_ARRAY2; + data1 = MMU_UTLB_DATA_ARRAY; + data2 = MMU_UTLB_DATA_ARRAY2; + nentries = 64; + } + + local_irq_save(flags); + jump_to_uncached(); + + urb = (mmucr & MMUCR_URB) >> MMUCR_URB_SHIFT; + + /* Make the "entry >= urb" test fail. */ + if (urb == 0) + urb = MMUCR_URB_NENTRIES + 1; + + if (tlb_type == TLB_TYPE_ITLB) { + addr1 = MMU_ITLB_ADDRESS_ARRAY; + addr2 = MMU_ITLB_ADDRESS_ARRAY2; + data1 = MMU_ITLB_DATA_ARRAY; + data2 = MMU_ITLB_DATA_ARRAY2; + nentries = 4; + } else { + addr1 = MMU_UTLB_ADDRESS_ARRAY; + addr2 = MMU_UTLB_ADDRESS_ARRAY2; + data1 = MMU_UTLB_DATA_ARRAY; + data2 = MMU_UTLB_DATA_ARRAY2; + nentries = 64; + } + + seq_printf(file, "entry: vpn ppn asid size valid wired\n"); + + for (entry = 0; entry < nentries; entry++) { + unsigned long vpn, ppn, asid, size; + unsigned long valid; + unsigned long val; + const char *sz = " ?"; + int i; + + val = __raw_readl(addr1 | (entry << MMU_TLB_ENTRY_SHIFT)); + ctrl_barrier(); + vpn = val & 0xfffffc00; + valid = val & 0x100; + + val = __raw_readl(addr2 | (entry << MMU_TLB_ENTRY_SHIFT)); + ctrl_barrier(); + asid = val & MMU_CONTEXT_ASID_MASK; + + val = __raw_readl(data1 | (entry << MMU_TLB_ENTRY_SHIFT)); + ctrl_barrier(); + ppn = (val & 0x0ffffc00) << 4; + + val = __raw_readl(data2 | (entry << MMU_TLB_ENTRY_SHIFT)); + ctrl_barrier(); + size = (val & 0xf0) >> 4; + + for (i = 0; i < ARRAY_SIZE(tlb_sizes); i++) { + if (tlb_sizes[i].bits == size) + break; + } + + if (i != ARRAY_SIZE(tlb_sizes)) + sz = tlb_sizes[i].size; + + seq_printf(file, "%2d: 0x%08lx 0x%08lx %5lu %s %s %s\n", + entry, vpn, ppn, asid, + sz, valid ? "V" : "-", + (urb <= entry) ? "W" : "-"); + } + + back_to_cached(); + local_irq_restore(flags); + + return 0; +} + +static int tlb_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, tlb_seq_show, inode->i_private); +} + +static const struct file_operations tlb_debugfs_fops = { + .owner = THIS_MODULE, + .open = tlb_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init tlb_debugfs_init(void) +{ + struct dentry *itlb, *utlb; + + itlb = debugfs_create_file("itlb", S_IRUSR, arch_debugfs_dir, + (unsigned int *)TLB_TYPE_ITLB, + &tlb_debugfs_fops); + if (unlikely(!itlb)) + return -ENOMEM; + + utlb = debugfs_create_file("utlb", S_IRUSR, arch_debugfs_dir, + (unsigned int *)TLB_TYPE_UTLB, + &tlb_debugfs_fops); + if (unlikely(!utlb)) { + debugfs_remove(itlb); + return -ENOMEM; + } + + return 0; +} +module_init(tlb_debugfs_init); + +MODULE_LICENSE("GPL v2"); diff --git a/arch/sh/mm/tlb-pteaex.c b/arch/sh/mm/tlb-pteaex.c index 32dc674c550..4db21adfe5d 100644 --- a/arch/sh/mm/tlb-pteaex.c +++ b/arch/sh/mm/tlb-pteaex.c @@ -12,7 +12,6 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/io.h> -#include <asm/system.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> @@ -73,5 +72,35 @@ void local_flush_tlb_one(unsigned long asid, unsigned long page) jump_to_uncached(); __raw_writel(page, MMU_UTLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT); __raw_writel(asid, MMU_UTLB_ADDRESS_ARRAY2 | MMU_PAGE_ASSOC_BIT); + __raw_writel(page, MMU_ITLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT); + __raw_writel(asid, MMU_ITLB_ADDRESS_ARRAY2 | MMU_PAGE_ASSOC_BIT); back_to_cached(); } + +void local_flush_tlb_all(void) +{ + unsigned long flags, status; + int i; + + /* + * Flush all the TLB. + */ + local_irq_save(flags); + jump_to_uncached(); + + status = __raw_readl(MMUCR); + status = ((status & MMUCR_URB) >> MMUCR_URB_SHIFT); + + if (status == 0) + status = MMUCR_URB_NENTRIES; + + for (i = 0; i < status; i++) + __raw_writel(0x0, MMU_UTLB_ADDRESS_ARRAY | (i << 8)); + + for (i = 0; i < 4; i++) + __raw_writel(0x0, MMU_ITLB_ADDRESS_ARRAY | (i << 8)); + + back_to_cached(); + ctrl_barrier(); + local_irq_restore(flags); +} diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c index 4f5f7cbdd50..6554fb439f0 100644 --- a/arch/sh/mm/tlb-sh3.c +++ b/arch/sh/mm/tlb-sh3.c @@ -20,7 +20,6 @@ #include <linux/smp.h> #include <linux/interrupt.h> -#include <asm/system.h> #include <asm/io.h> #include <asm/uaccess.h> #include <asm/pgalloc.h> @@ -77,3 +76,22 @@ void local_flush_tlb_one(unsigned long asid, unsigned long page) for (i = 0; i < ways; i++) __raw_writel(data, addr + (i << 8)); } + +void local_flush_tlb_all(void) +{ + unsigned long flags, status; + + /* + * Flush all the TLB. + * + * Write to the MMU control register's bit: + * TF-bit for SH-3, TI-bit for SH-4. + * It's same position, bit #2. + */ + local_irq_save(flags); + status = __raw_readl(MMUCR); + status |= 0x04; + __raw_writel(status, MMUCR); + ctrl_barrier(); + local_irq_restore(flags); +} diff --git a/arch/sh/mm/tlb-sh4.c b/arch/sh/mm/tlb-sh4.c index ccac77f504a..d42dd7e443d 100644 --- a/arch/sh/mm/tlb-sh4.c +++ b/arch/sh/mm/tlb-sh4.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/io.h> -#include <asm/system.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> @@ -80,3 +79,31 @@ void local_flush_tlb_one(unsigned long asid, unsigned long page) __raw_writel(data, addr); back_to_cached(); } + +void local_flush_tlb_all(void) +{ + unsigned long flags, status; + int i; + + /* + * Flush all the TLB. + */ + local_irq_save(flags); + jump_to_uncached(); + + status = __raw_readl(MMUCR); + status = ((status & MMUCR_URB) >> MMUCR_URB_SHIFT); + + if (status == 0) + status = MMUCR_URB_NENTRIES; + + for (i = 0; i < status; i++) + __raw_writel(0x0, MMU_UTLB_ADDRESS_ARRAY | (i << 8)); + + for (i = 0; i < 4; i++) + __raw_writel(0x0, MMU_ITLB_ADDRESS_ARRAY | (i << 8)); + + back_to_cached(); + ctrl_barrier(); + local_irq_restore(flags); +} diff --git a/arch/sh/mm/tlb-sh5.c b/arch/sh/mm/tlb-sh5.c index f27dbe1c159..e4bb2a8e0a6 100644 --- a/arch/sh/mm/tlb-sh5.c +++ b/arch/sh/mm/tlb-sh5.c @@ -17,7 +17,7 @@ /** * sh64_tlb_init - Perform initial setup for the DTLB and ITLB. */ -int __init sh64_tlb_init(void) +int sh64_tlb_init(void) { /* Assign some sane DTLB defaults */ cpu_data->dtlb.entries = 64; @@ -182,3 +182,43 @@ void tlb_unwire_entry(void) local_irq_restore(flags); } + +void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) +{ + unsigned long long ptel; + unsigned long long pteh=0; + struct tlb_info *tlbp; + unsigned long long next; + unsigned int fault_code = get_thread_fault_code(); + + /* Get PTEL first */ + ptel = pte.pte_low; + + /* + * Set PTEH register + */ + pteh = neff_sign_extend(address & MMU_VPN_MASK); + + /* Set the ASID. */ + pteh |= get_asid() << PTEH_ASID_SHIFT; + pteh |= PTEH_VALID; + + /* Set PTEL register, set_pte has performed the sign extension */ + ptel &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */ + + if (fault_code & FAULT_CODE_ITLB) + tlbp = &cpu_data->itlb; + else + tlbp = &cpu_data->dtlb; + + next = tlbp->next; + __flush_tlb_slot(next); + asm volatile ("putcfg %0,1,%2\n\n\t" + "putcfg %0,0,%1\n" + : : "r" (next), "r" (pteh), "r" (ptel) ); + + next += TLB_STEP; + if (next > tlbp->last) + next = tlbp->first; + tlbp->next = next; +} diff --git a/arch/sh/mm/tlb-urb.c b/arch/sh/mm/tlb-urb.c index bb5b9098956..c92ce20db39 100644 --- a/arch/sh/mm/tlb-urb.c +++ b/arch/sh/mm/tlb-urb.c @@ -24,13 +24,9 @@ void tlb_wire_entry(struct vm_area_struct *vma, unsigned long addr, pte_t pte) local_irq_save(flags); - /* Load the entry into the TLB */ - __update_tlb(vma, addr, pte); - - /* ... and wire it up. */ status = __raw_readl(MMUCR); urb = (status & MMUCR_URB) >> MMUCR_URB_SHIFT; - status &= ~MMUCR_URB; + status &= ~MMUCR_URC; /* * Make sure we're not trying to wire the last TLB entry slot. @@ -39,7 +35,23 @@ void tlb_wire_entry(struct vm_area_struct *vma, unsigned long addr, pte_t pte) urb = urb % MMUCR_URB_NENTRIES; + /* + * Insert this entry into the highest non-wired TLB slot (via + * the URC field). + */ + status |= (urb << MMUCR_URC_SHIFT); + __raw_writel(status, MMUCR); + ctrl_barrier(); + + /* Load the entry into the TLB */ + __update_tlb(vma, addr, pte); + + /* ... and wire it up. */ + status = __raw_readl(MMUCR); + + status &= ~MMUCR_URB; status |= (urb << MMUCR_URB_SHIFT); + __raw_writel(status, MMUCR); ctrl_barrier(); diff --git a/arch/sh/mm/tlbex_32.c b/arch/sh/mm/tlbex_32.c new file mode 100644 index 00000000000..382262dc0c4 --- /dev/null +++ b/arch/sh/mm/tlbex_32.c @@ -0,0 +1,78 @@ +/* + * TLB miss handler for SH with an MMU. + * + * Copyright (C) 1999 Niibe Yutaka + * Copyright (C) 2003 - 2012 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/kprobes.h> +#include <linux/kdebug.h> +#include <asm/mmu_context.h> +#include <asm/thread_info.h> + +/* + * Called with interrupts disabled. + */ +asmlinkage int __kprobes +handle_tlbmiss(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + pte_t entry; + + /* + * We don't take page faults for P1, P2, and parts of P4, these + * are always mapped, whether it be due to legacy behaviour in + * 29-bit mode, or due to PMB configuration in 32-bit mode. + */ + if (address >= P3SEG && address < P3_ADDR_MAX) { + pgd = pgd_offset_k(address); + } else { + if (unlikely(address >= TASK_SIZE || !current->mm)) + return 1; + + pgd = pgd_offset(current->mm, address); + } + + pud = pud_offset(pgd, address); + if (pud_none_or_clear_bad(pud)) + return 1; + pmd = pmd_offset(pud, address); + if (pmd_none_or_clear_bad(pmd)) + return 1; + pte = pte_offset_kernel(pmd, address); + entry = *pte; + if (unlikely(pte_none(entry) || pte_not_present(entry))) + return 1; + if (unlikely(error_code && !pte_write(entry))) + return 1; + + if (error_code) + entry = pte_mkdirty(entry); + entry = pte_mkyoung(entry); + + set_pte(pte, entry); + +#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP) + /* + * SH-4 does not set MMUCR.RC to the corresponding TLB entry in + * the case of an initial page write exception, so we need to + * flush it in order to avoid potential TLB entry duplication. + */ + if (error_code == FAULT_CODE_INITIAL) + local_flush_tlb_one(get_asid(), address & PAGE_MASK); +#endif + + set_thread_fault_code(error_code); + update_mmu_cache(NULL, address, pte); + + return 0; +} diff --git a/arch/sh/mm/tlbex_64.c b/arch/sh/mm/tlbex_64.c new file mode 100644 index 00000000000..8557548fc53 --- /dev/null +++ b/arch/sh/mm/tlbex_64.c @@ -0,0 +1,166 @@ +/* + * The SH64 TLB miss. + * + * Original code from fault.c + * Copyright (C) 2000, 2001 Paolo Alberelli + * + * Fast PTE->TLB refill path + * Copyright (C) 2003 Richard.Curnow@superh.com + * + * IMPORTANT NOTES : + * The do_fast_page_fault function is called from a context in entry.S + * where very few registers have been saved. In particular, the code in + * this file must be compiled not to use ANY caller-save registers that + * are not part of the restricted save set. Also, it means that code in + * this file must not make calls to functions elsewhere in the kernel, or + * else the excepting context will see corruption in its caller-save + * registers. Plus, the entry.S save area is non-reentrant, so this code + * has to run with SR.BL==1, i.e. no interrupts taken inside it and panic + * on any exception. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/kprobes.h> +#include <asm/tlb.h> +#include <asm/io.h> +#include <asm/uaccess.h> +#include <asm/pgalloc.h> +#include <asm/mmu_context.h> + +static int handle_tlbmiss(unsigned long long protection_flags, + unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + pte_t entry; + + if (is_vmalloc_addr((void *)address)) { + pgd = pgd_offset_k(address); + } else { + if (unlikely(address >= TASK_SIZE || !current->mm)) + return 1; + + pgd = pgd_offset(current->mm, address); + } + + pud = pud_offset(pgd, address); + if (pud_none(*pud) || !pud_present(*pud)) + return 1; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd) || !pmd_present(*pmd)) + return 1; + + pte = pte_offset_kernel(pmd, address); + entry = *pte; + if (pte_none(entry) || !pte_present(entry)) + return 1; + + /* + * If the page doesn't have sufficient protection bits set to + * service the kind of fault being handled, there's not much + * point doing the TLB refill. Punt the fault to the general + * handler. + */ + if ((pte_val(entry) & protection_flags) != protection_flags) + return 1; + + update_mmu_cache(NULL, address, pte); + + return 0; +} + +/* + * Put all this information into one structure so that everything is just + * arithmetic relative to a single base address. This reduces the number + * of movi/shori pairs needed just to load addresses of static data. + */ +struct expevt_lookup { + unsigned short protection_flags[8]; + unsigned char is_text_access[8]; + unsigned char is_write_access[8]; +}; + +#define PRU (1<<9) +#define PRW (1<<8) +#define PRX (1<<7) +#define PRR (1<<6) + +/* Sized as 8 rather than 4 to allow checking the PTE's PRU bit against whether + the fault happened in user mode or privileged mode. */ +static struct expevt_lookup expevt_lookup_table = { + .protection_flags = {PRX, PRX, 0, 0, PRR, PRR, PRW, PRW}, + .is_text_access = {1, 1, 0, 0, 0, 0, 0, 0} +}; + +static inline unsigned int +expevt_to_fault_code(unsigned long expevt) +{ + if (expevt == 0xa40) + return FAULT_CODE_ITLB; + else if (expevt == 0x060) + return FAULT_CODE_WRITE; + + return 0; +} + +/* + This routine handles page faults that can be serviced just by refilling a + TLB entry from an existing page table entry. (This case represents a very + large majority of page faults.) Return 1 if the fault was successfully + handled. Return 0 if the fault could not be handled. (This leads into the + general fault handling in fault.c which deals with mapping file-backed + pages, stack growth, segmentation faults, swapping etc etc) + */ +asmlinkage int __kprobes +do_fast_page_fault(unsigned long long ssr_md, unsigned long long expevt, + unsigned long address) +{ + unsigned long long protection_flags; + unsigned long long index; + unsigned long long expevt4; + unsigned int fault_code; + + /* The next few lines implement a way of hashing EXPEVT into a + * small array index which can be used to lookup parameters + * specific to the type of TLBMISS being handled. + * + * Note: + * ITLBMISS has EXPEVT==0xa40 + * RTLBMISS has EXPEVT==0x040 + * WTLBMISS has EXPEVT==0x060 + */ + expevt4 = (expevt >> 4); + /* TODO : xor ssr_md into this expression too. Then we can check + * that PRU is set when it needs to be. */ + index = expevt4 ^ (expevt4 >> 5); + index &= 7; + + fault_code = expevt_to_fault_code(expevt); + + protection_flags = expevt_lookup_table.protection_flags[index]; + + if (expevt_lookup_table.is_text_access[index]) + fault_code |= FAULT_CODE_ITLB; + if (!ssr_md) + fault_code |= FAULT_CODE_USER; + + set_thread_fault_code(fault_code); + + return handle_tlbmiss(protection_flags, address); +} diff --git a/arch/sh/mm/tlbflush_32.c b/arch/sh/mm/tlbflush_32.c index 004bb3f25b5..a6a20d6de4c 100644 --- a/arch/sh/mm/tlbflush_32.c +++ b/arch/sh/mm/tlbflush_32.c @@ -120,21 +120,18 @@ void local_flush_tlb_mm(struct mm_struct *mm) } } -void local_flush_tlb_all(void) +void __flush_tlb_global(void) { - unsigned long flags, status; + unsigned long flags; + + local_irq_save(flags); /* - * Flush all the TLB. - * - * Write to the MMU control register's bit: - * TF-bit for SH-3, TI-bit for SH-4. - * It's same position, bit #2. + * This is the most destructive of the TLB flushing options, + * and will tear down all of the UTLB/ITLB mappings, including + * wired entries. */ - local_irq_save(flags); - status = __raw_readl(MMUCR); - status |= 0x04; - __raw_writel(status, MMUCR); - ctrl_barrier(); + __raw_writel(__raw_readl(MMUCR) | MMUCR_TI, MMUCR); + local_irq_restore(flags); } diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c index 706da1d3a67..f33fdd2558e 100644 --- a/arch/sh/mm/tlbflush_64.c +++ b/arch/sh/mm/tlbflush_64.c @@ -3,7 +3,7 @@ * * Copyright (C) 2000, 2001 Paolo Alberelli * Copyright (C) 2003 Richard Curnow (/proc/tlb, bug fixes) - * Copyright (C) 2003 - 2009 Paul Mundt + * Copyright (C) 2003 - 2012 Paul Mundt * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive @@ -22,313 +22,12 @@ #include <linux/smp.h> #include <linux/perf_event.h> #include <linux/interrupt.h> -#include <asm/system.h> #include <asm/io.h> #include <asm/tlb.h> #include <asm/uaccess.h> #include <asm/pgalloc.h> #include <asm/mmu_context.h> -extern void die(const char *,struct pt_regs *,long); - -#define PFLAG(val,flag) (( (val) & (flag) ) ? #flag : "" ) -#define PPROT(flag) PFLAG(pgprot_val(prot),flag) - -static inline void print_prots(pgprot_t prot) -{ - printk("prot is 0x%016llx\n",pgprot_val(prot)); - - printk("%s %s %s %s %s\n",PPROT(_PAGE_SHARED),PPROT(_PAGE_READ), - PPROT(_PAGE_EXECUTE),PPROT(_PAGE_WRITE),PPROT(_PAGE_USER)); -} - -static inline void print_vma(struct vm_area_struct *vma) -{ - printk("vma start 0x%08lx\n", vma->vm_start); - printk("vma end 0x%08lx\n", vma->vm_end); - - print_prots(vma->vm_page_prot); - printk("vm_flags 0x%08lx\n", vma->vm_flags); -} - -static inline void print_task(struct task_struct *tsk) -{ - printk("Task pid %d\n", task_pid_nr(tsk)); -} - -static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address) -{ - pgd_t *dir; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - pte_t entry; - - dir = pgd_offset(mm, address); - if (pgd_none(*dir)) - return NULL; - - pud = pud_offset(dir, address); - if (pud_none(*pud)) - return NULL; - - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd)) - return NULL; - - pte = pte_offset_kernel(pmd, address); - entry = *pte; - if (pte_none(entry) || !pte_present(entry)) - return NULL; - - return pte; -} - -/* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate - * routines. - */ -asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess, - unsigned long textaccess, unsigned long address) -{ - struct task_struct *tsk; - struct mm_struct *mm; - struct vm_area_struct * vma; - const struct exception_table_entry *fixup; - pte_t *pte; - int fault; - - /* SIM - * Note this is now called with interrupts still disabled - * This is to cope with being called for a missing IO port - * address with interrupts disabled. This should be fixed as - * soon as we have a better 'fast path' miss handler. - * - * Plus take care how you try and debug this stuff. - * For example, writing debug data to a port which you - * have just faulted on is not going to work. - */ - - tsk = current; - mm = tsk->mm; - - /* Not an IO address, so reenable interrupts */ - local_irq_enable(); - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); - - /* - * If we're in an interrupt or have no user - * context, we must not take the fault.. - */ - if (in_atomic() || !mm) - goto no_context; - - /* TLB misses upon some cache flushes get done under cli() */ - down_read(&mm->mmap_sem); - - vma = find_vma(mm, address); - - if (!vma) { -#ifdef DEBUG_FAULT - print_task(tsk); - printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n", - __func__, __LINE__, - address,regs->pc,textaccess,writeaccess); - show_regs(regs); -#endif - goto bad_area; - } - if (vma->vm_start <= address) { - goto good_area; - } - - if (!(vma->vm_flags & VM_GROWSDOWN)) { -#ifdef DEBUG_FAULT - print_task(tsk); - printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n", - __func__, __LINE__, - address,regs->pc,textaccess,writeaccess); - show_regs(regs); - - print_vma(vma); -#endif - goto bad_area; - } - if (expand_stack(vma, address)) { -#ifdef DEBUG_FAULT - print_task(tsk); - printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n", - __func__, __LINE__, - address,regs->pc,textaccess,writeaccess); - show_regs(regs); -#endif - goto bad_area; - } -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - if (textaccess) { - if (!(vma->vm_flags & VM_EXEC)) - goto bad_area; - } else { - if (writeaccess) { - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - } else { - if (!(vma->vm_flags & VM_READ)) - goto bad_area; - } - } - - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ -survive: - fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0); - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); - } - - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, - regs, address); - } - - /* If we get here, the page fault has been handled. Do the TLB refill - now from the newly-setup PTE, to avoid having to fault again right - away on the same instruction. */ - pte = lookup_pte (mm, address); - if (!pte) { - /* From empirical evidence, we can get here, due to - !pte_present(pte). (e.g. if a swap-in occurs, and the page - is swapped back out again before the process that wanted it - gets rescheduled?) */ - goto no_pte; - } - - __do_tlb_refill(address, textaccess, pte); - -no_pte: - - up_read(&mm->mmap_sem); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: -#ifdef DEBUG_FAULT - printk("fault:bad area\n"); -#endif - up_read(&mm->mmap_sem); - - if (user_mode(regs)) { - static int count=0; - siginfo_t info; - if (count < 4) { - /* This is really to help debug faults when starting - * usermode, so only need a few */ - count++; - printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n", - address, task_pid_nr(current), current->comm, - (unsigned long) regs->pc); -#if 0 - show_regs(regs); -#endif - } - if (is_global_init(tsk)) { - panic("INIT had user mode bad_area\n"); - } - tsk->thread.address = address; - tsk->thread.error_code = writeaccess; - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_addr = (void *) address; - force_sig_info(SIGSEGV, &info, tsk); - return; - } - -no_context: -#ifdef DEBUG_FAULT - printk("fault:No context\n"); -#endif - /* Are we prepared to handle this kernel fault? */ - fixup = search_exception_tables(regs->pc); - if (fixup) { - regs->pc = fixup->fixup; - return; - } - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - * - */ - if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); - else - printk(KERN_ALERT "Unable to handle kernel paging request"); - printk(" at virtual address %08lx\n", address); - printk(KERN_ALERT "pc = %08Lx%08Lx\n", regs->pc >> 32, regs->pc & 0xffffffff); - die("Oops", regs, writeaccess); - do_exit(SIGKILL); - -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - if (is_global_init(current)) { - panic("INIT out of memory\n"); - yield(); - goto survive; - } - printk("fault:Out of memory\n"); - up_read(&mm->mmap_sem); - if (is_global_init(current)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - printk("VM: killing process %s\n", tsk->comm); - if (user_mode(regs)) - do_group_exit(SIGKILL); - goto no_context; - -do_sigbus: - printk("fault:Do sigbus\n"); - up_read(&mm->mmap_sem); - - /* - * Send a sigbus, regardless of whether we were in kernel - * or user mode. - */ - tsk->thread.address = address; - tsk->thread.error_code = writeaccess; - tsk->thread.trap_no = 14; - force_sig(SIGBUS, tsk); - - /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) - goto no_context; -} - void local_flush_tlb_one(unsigned long asid, unsigned long page) { unsigned long long match, pteh=0, lpage; @@ -467,6 +166,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) flush_tlb_all(); } -void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) +void __flush_tlb_global(void) { + flush_tlb_all(); } diff --git a/arch/sh/mm/uncached.c b/arch/sh/mm/uncached.c index 8a4eca551fc..a7767da815e 100644 --- a/arch/sh/mm/uncached.c +++ b/arch/sh/mm/uncached.c @@ -28,7 +28,7 @@ EXPORT_SYMBOL(virt_addr_uncached); void __init uncached_init(void) { -#ifdef CONFIG_29BIT +#if defined(CONFIG_29BIT) || !defined(CONFIG_MMU) uncached_start = P2SEG; #else uncached_start = memory_end; |
