40 files changed, 1923 insertions, 1390 deletions
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 1445ca6257d..dba285e8680 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -79,11 +79,11 @@ config 29BIT
 
 config 32BIT
 	bool
-	default y if CPU_SH5
+	default y if CPU_SH5 || !MMU
 
 config PMB
 	bool "Support 32-bit physical addressing through PMB"
-	depends on MMU && EXPERIMENTAL && CPU_SH4A && !CPU_SH4AL_DSP
+	depends on MMU && CPU_SH4A && !CPU_SH4AL_DSP
 	select 32BIT
 	select UNCACHED_MAPPING
 	help
@@ -110,7 +110,8 @@ config VSYSCALL
 
 config NUMA
 	bool "Non Uniform Memory Access (NUMA) Support"
-	depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL
+	depends on MMU && SYS_SUPPORTS_NUMA
+	select ARCH_WANT_NUMA_VARIABLE_LOCALITY
 	default n
 	help
 	  Some SH systems have many various memories scattered around
@@ -136,16 +137,6 @@ config ARCH_SPARSEMEM_ENABLE
 config ARCH_SPARSEMEM_DEFAULT
 	def_bool y
 
-config MAX_ACTIVE_REGIONS
-	int
-	default "6" if (CPU_SUBTYPE_SHX3 && SPARSEMEM)
-	default "2" if SPARSEMEM && (CPU_SUBTYPE_SH7722 || \
-		       CPU_SUBTYPE_SH7785)
-	default "1"
-
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 
@@ -168,6 +159,10 @@ config IOREMAP_FIXED
 config UNCACHED_MAPPING
 	bool
 
+config HAVE_SRAM_POOL
+	bool
+	select GENERIC_ALLOCATOR
+
 choice
 	prompt "Kernel page size"
 	default PAGE_SIZE_4KB
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index 3dc8a8a6382..cee6b9999d8 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -10,21 +10,23 @@ cacheops-$(CONFIG_CPU_SH3)		:= cache-sh3.o
 cacheops-$(CONFIG_CPU_SH4)		:= cache-sh4.o flush-sh4.o
 cacheops-$(CONFIG_CPU_SH5)		:= cache-sh5.o flush-sh4.o
 cacheops-$(CONFIG_SH7705_CACHE_32KB)	+= cache-sh7705.o
+cacheops-$(CONFIG_CPU_SHX3)		+= cache-shx3.o
 
 obj-y			+= $(cacheops-y)
 
 mmu-y			:= nommu.o extable_32.o
-mmu-$(CONFIG_MMU)	:= extable_$(BITS).o fault_$(BITS).o \
-			   ioremap.o kmap.o pgtable.o tlbflush_$(BITS).o
+mmu-$(CONFIG_MMU)	:= extable_$(BITS).o fault.o gup.o ioremap.o kmap.o \
+			   pgtable.o tlbex_$(BITS).o tlbflush_$(BITS).o
 
 obj-y			+= $(mmu-y)
-obj-$(CONFIG_DEBUG_FS)	+= asids-debugfs.o
 
-ifdef CONFIG_DEBUG_FS
-obj-$(CONFIG_CPU_SH4)	+= cache-debugfs.o
+debugfs-y			:= asids-debugfs.o
+ifndef CONFIG_CACHE_OFF
+debugfs-$(CONFIG_CPU_SH4)	+= cache-debugfs.o
 endif
 
 ifdef CONFIG_MMU
+debugfs-$(CONFIG_CPU_SH4)	+= tlb-debugfs.o
 tlb-$(CONFIG_CPU_SH3)		:= tlb-sh3.o
 tlb-$(CONFIG_CPU_SH4)		:= tlb-sh4.o tlb-urb.o
 tlb-$(CONFIG_CPU_SH5)		:= tlb-sh5.o
@@ -32,13 +34,17 @@ tlb-$(CONFIG_CPU_HAS_PTEAEX)	:= tlb-pteaex.o tlb-urb.o
 obj-y				+= $(tlb-y)
 endif
 
+obj-$(CONFIG_DEBUG_FS)		+= $(debugfs-y)
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_PMB)		+= pmb.o
 obj-$(CONFIG_NUMA)		+= numa.o
 obj-$(CONFIG_IOREMAP_FIXED)	+= ioremap_fixed.o
 obj-$(CONFIG_UNCACHED_MAPPING)	+= uncached.o
+obj-$(CONFIG_HAVE_SRAM_POOL)	+= sram.o
 
-# Special flags for fault_64.o.  This puts restrictions on the number of
+GCOV_PROFILE_pmb.o := n
+
+# Special flags for tlbex_64.o.  This puts restrictions on the number of
 # caller-save registers that the compiler can target when building this file.
 # This is required because the code is called from a context in entry.S where
 # very few registers have been saved in the exception handler (for speed
@@ -53,7 +59,7 @@ obj-$(CONFIG_UNCACHED_MAPPING)	+= uncached.o
 # The resources not listed below are callee save, i.e. the compiler is free to
 # use any of them and will spill them to the stack itself.
 
-CFLAGS_fault_64.o += -ffixed-r7 \
+CFLAGS_tlbex_64.o += -ffixed-r7 \
 	-ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \
 	-ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \
 	-ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
@@ -63,4 +69,4 @@ CFLAGS_fault_64.o += -ffixed-r7 \
 	-ffixed-r60 -ffixed-r61 -ffixed-r62 \
 	-fomit-frame-pointer
 
-EXTRA_CFLAGS += -Werror
+ccflags-y := -Werror
diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c
index b2595b8548e..ec2b2530242 100644
--- a/arch/sh/mm/alignment.c
+++ b/arch/sh/mm/alignment.c
@@ -13,6 +13,7 @@
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
 #include <linux/uaccess.h>
+#include <linux/ratelimit.h>
 #include <asm/alignment.h>
 #include <asm/processor.h>
 
@@ -95,13 +96,13 @@ int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
 void unaligned_fixups_notify(struct task_struct *tsk, insn_size_t insn,
 			     struct pt_regs *regs)
 {
-	if (user_mode(regs) && (se_usermode & UM_WARN) && printk_ratelimit())
-		pr_notice("Fixing up unaligned userspace access "
+	if (user_mode(regs) && (se_usermode & UM_WARN))
+		pr_notice_ratelimited("Fixing up unaligned userspace access "
 			  "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n",
 			  tsk->comm, task_pid_nr(tsk),
 			  (void *)instruction_pointer(regs), insn);
-	else if (se_kernmode_warn && printk_ratelimit())
-		pr_notice("Fixing up unaligned kernel access "
+	else if (se_kernmode_warn)
+		pr_notice_ratelimited("Fixing up unaligned kernel access "
 			  "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n",
 			  tsk->comm, task_pid_nr(tsk),
 			  (void *)instruction_pointer(regs), insn);
@@ -139,7 +140,7 @@ static int alignment_proc_open(struct inode *inode, struct file *file)
 static ssize_t alignment_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *pos)
 {
-	int *data = PDE(file->f_path.dentry->d_inode)->data;
+	int *data = PDE_DATA(file_inode(file));
 	char mode;
 
 	if (count > 0) {
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index cd8c3bf39b5..74c03ecc487 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -63,7 +63,7 @@ static int __init asids_debugfs_init(void)
 {
 	struct dentry *asids_dentry;
 
-	asids_dentry = debugfs_create_file("asids", S_IRUSR, sh_debugfs_root,
+	asids_dentry = debugfs_create_file("asids", S_IRUSR, arch_debugfs_dir,
 					   NULL, &asids_debugfs_fops);
 	if (!asids_dentry)
 		return -ENOMEM;
diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c
index 690ed010d00..777e50f33c0 100644
--- a/arch/sh/mm/cache-debugfs.c
+++ b/arch/sh/mm/cache-debugfs.c
@@ -26,9 +26,9 @@ static int cache_seq_show(struct seq_file *file, void *iter)
 {
 	unsigned int cache_type = (unsigned int)file->private;
 	struct cache_info *cache;
-	unsigned int waysize, way, cache_size;
-	unsigned long ccr, base;
-	static unsigned long addrstart = 0;
+	unsigned int waysize, way;
+	unsigned long ccr;
+	unsigned long addrstart = 0;
 
 	/*
 	 * Go uncached immediately so we don't skew the results any
@@ -36,7 +36,7 @@ static int cache_seq_show(struct seq_file *file, void *iter)
 	 */
 	jump_to_uncached();
 
-	ccr = __raw_readl(CCR);
+	ccr = __raw_readl(SH_CCR);
 	if ((ccr & CCR_CACHE_ENABLE) == 0) {
 		back_to_cached();
 
@@ -45,28 +45,13 @@ static int cache_seq_show(struct seq_file *file, void *iter)
 	}
 
 	if (cache_type == CACHE_TYPE_DCACHE) {
-		base = CACHE_OC_ADDRESS_ARRAY;
+		addrstart = CACHE_OC_ADDRESS_ARRAY;
 		cache = &current_cpu_data.dcache;
 	} else {
-		base = CACHE_IC_ADDRESS_ARRAY;
+		addrstart = CACHE_IC_ADDRESS_ARRAY;
 		cache = &current_cpu_data.icache;
 	}
 
-	/*
-	 * Due to the amount of data written out (depending on the cache size),
-	 * we may be iterated over multiple times. In this case, keep track of
-	 * the entry position in addrstart, and rewind it when we've hit the
-	 * end of the cache.
-	 *
-	 * Likewise, the same code is used for multiple caches, so care must
-	 * be taken for bouncing addrstart back and forth so the appropriate
-	 * cache is hit.
-	 */
-	cache_size = cache->ways * cache->sets * cache->linesz;
-	if (((addrstart & 0xff000000) != base) ||
-	     (addrstart & 0x00ffffff) > cache_size)
-		addrstart = base;
-
 	waysize = cache->sets;
 
 	/*
@@ -126,25 +111,19 @@ static int __init cache_debugfs_init(void)
 {
 	struct dentry *dcache_dentry, *icache_dentry;
 
-	dcache_dentry = debugfs_create_file("dcache", S_IRUSR, sh_debugfs_root,
+	dcache_dentry = debugfs_create_file("dcache", S_IRUSR, arch_debugfs_dir,
 					    (unsigned int *)CACHE_TYPE_DCACHE,
 					    &cache_debugfs_fops);
 	if (!dcache_dentry)
 		return -ENOMEM;
-	if (IS_ERR(dcache_dentry))
-		return PTR_ERR(dcache_dentry);
 
-	icache_dentry = debugfs_create_file("icache", S_IRUSR, sh_debugfs_root,
+	icache_dentry = debugfs_create_file("icache", S_IRUSR, arch_debugfs_dir,
 					    (unsigned int *)CACHE_TYPE_ICACHE,
 					    &cache_debugfs_fops);
 	if (!icache_dentry) {
 		debugfs_remove(dcache_dentry);
 		return -ENOMEM;
 	}
-	if (IS_ERR(icache_dentry)) {
-		debugfs_remove(dcache_dentry);
-		return PTR_ERR(icache_dentry);
-	}
 
 	return 0;
 }
diff --git a/arch/sh/mm/cache-sh2.c b/arch/sh/mm/cache-sh2.c
index defcf719f2e..a74259f2f98 100644
--- a/arch/sh/mm/cache-sh2.c
+++ b/arch/sh/mm/cache-sh2.c
@@ -63,9 +63,9 @@ static void sh2__flush_invalidate_region(void *start, int size)
 	local_irq_save(flags);
 	jump_to_uncached();
 
-	ccr = __raw_readl(CCR);
+	ccr = __raw_readl(SH_CCR);
 	ccr |= CCR_CACHE_INVALIDATE;
-	__raw_writel(ccr, CCR);
+	__raw_writel(ccr, SH_CCR);
 
 	back_to_cached();
 	local_irq_restore(flags);
diff --git a/arch/sh/mm/cache-sh2a.c b/arch/sh/mm/cache-sh2a.c
index 1f51225426a..ee87d081259 100644
--- a/arch/sh/mm/cache-sh2a.c
+++ b/arch/sh/mm/cache-sh2a.c
@@ -15,35 +15,80 @@
 #include <asm/cacheflush.h>
 #include <asm/io.h>
 
+/*
+ * The maximum number of pages we support up to when doing ranged dcache
+ * flushing. Anything exceeding this will simply flush the dcache in its
+ * entirety.
+ */
+#define MAX_OCACHE_PAGES	32
+#define MAX_ICACHE_PAGES	32
+
+#ifdef CONFIG_CACHE_WRITEBACK
+static void sh2a_flush_oc_line(unsigned long v, int way)
+{
+	unsigned long addr = (v & 0x000007f0) | (way << 11);
+	unsigned long data;
+
+	data = __raw_readl(CACHE_OC_ADDRESS_ARRAY | addr);
+	if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) {
+		data &= ~SH_CACHE_UPDATED;
+		__raw_writel(data, CACHE_OC_ADDRESS_ARRAY | addr);
+	}
+}
+#endif
+
+static void sh2a_invalidate_line(unsigned long cache_addr, unsigned long v)
+{
+	/* Set associative bit to hit all ways */
+	unsigned long addr = (v & 0x000007f0) | SH_CACHE_ASSOC;
+	__raw_writel((addr & CACHE_PHYSADDR_MASK), cache_addr | addr);
+}
+
+/*
+ * Write back the dirty D-caches, but not invalidate them.
+ */
 static void sh2a__flush_wback_region(void *start, int size)
 {
+#ifdef CONFIG_CACHE_WRITEBACK
 	unsigned long v;
 	unsigned long begin, end;
 	unsigned long flags;
+	int nr_ways;
 
 	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
 	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
 		& ~(L1_CACHE_BYTES-1);
+	nr_ways = current_cpu_data.dcache.ways;
 
 	local_irq_save(flags);
 	jump_to_uncached();
 
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		unsigned long addr = CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0);
+	/* If there are too many pages then flush the entire cache */
+	if (((end - begin) >> PAGE_SHIFT) >= MAX_OCACHE_PAGES) {
+		begin = CACHE_OC_ADDRESS_ARRAY;
+		end = begin + (nr_ways * current_cpu_data.dcache.way_size);
+
+		for (v = begin; v < end; v += L1_CACHE_BYTES) {
+			unsigned long data = __raw_readl(v);
+			if (data & SH_CACHE_UPDATED)
+				__raw_writel(data & ~SH_CACHE_UPDATED, v);
+		}
+	} else {
 		int way;
-		for (way = 0; way < 4; way++) {
-			unsigned long data =  __raw_readl(addr | (way << 11));
-			if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) {
-				data &= ~SH_CACHE_UPDATED;
-				__raw_writel(data, addr | (way << 11));
-			}
+		for (way = 0; way < nr_ways; way++) {
+			for (v = begin; v < end; v += L1_CACHE_BYTES)
+				sh2a_flush_oc_line(v, way);
 		}
 	}
 
 	back_to_cached();
 	local_irq_restore(flags);
+#endif
 }
 
+/*
+ * Write back the dirty D-caches and invalidate them.
+ */
 static void sh2a__flush_purge_region(void *start, int size)
 {
 	unsigned long v;
@@ -58,13 +103,22 @@ static void sh2a__flush_purge_region(void *start, int size)
 	jump_to_uncached();
 
 	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		__raw_writel((v & CACHE_PHYSADDR_MASK),
-			  CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
+#ifdef CONFIG_CACHE_WRITEBACK
+		int way;
+		int nr_ways = current_cpu_data.dcache.ways;
+		for (way = 0; way < nr_ways; way++)
+			sh2a_flush_oc_line(v, way);
+#endif
+		sh2a_invalidate_line(CACHE_OC_ADDRESS_ARRAY, v);
 	}
+
 	back_to_cached();
 	local_irq_restore(flags);
 }
 
+/*
+ * Invalidate the D-caches, but no write back please
+ */
 static void sh2a__flush_invalidate_region(void *start, int size)
 {
 	unsigned long v;
@@ -74,29 +128,26 @@ static void sh2a__flush_invalidate_region(void *start, int size)
 	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
 	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
 		& ~(L1_CACHE_BYTES-1);
+
 	local_irq_save(flags);
 	jump_to_uncached();
 
-#ifdef CONFIG_CACHE_WRITEBACK
-	__raw_writel(__raw_readl(CCR) | CCR_OCACHE_INVALIDATE, CCR);
-	/* I-cache invalidate */
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		__raw_writel((v & CACHE_PHYSADDR_MASK),
-			  CACHE_IC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
+	/* If there are too many pages then just blow the cache */
+	if (((end - begin) >> PAGE_SHIFT) >= MAX_OCACHE_PAGES) {
+		__raw_writel(__raw_readl(SH_CCR) | CCR_OCACHE_INVALIDATE,
+			     SH_CCR);
+	} else {
+		for (v = begin; v < end; v += L1_CACHE_BYTES)
+			sh2a_invalidate_line(CACHE_OC_ADDRESS_ARRAY, v);
 	}
-#else
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		__raw_writel((v & CACHE_PHYSADDR_MASK),
-			  CACHE_IC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
-		__raw_writel((v & CACHE_PHYSADDR_MASK),
-			  CACHE_OC_ADDRESS_ARRAY | (v & 0x000007f0) | 0x00000008);
-	}
-#endif
+
 	back_to_cached();
 	local_irq_restore(flags);
 }
 
-/* WBack O-Cache and flush I-Cache */
+/*
+ * Write back the range of D-cache, and purge the I-cache.
+ */
 static void sh2a_flush_icache_range(void *args)
 {
 	struct flusher_data *data = args;
@@ -107,23 +158,21 @@ static void sh2a_flush_icache_range(void *args)
 	start = data->addr1 & ~(L1_CACHE_BYTES-1);
 	end = (data->addr2 + L1_CACHE_BYTES-1) & ~(L1_CACHE_BYTES-1);
 
+#ifdef CONFIG_CACHE_WRITEBACK
+	sh2a__flush_wback_region((void *)start, end-start);
+#endif
+
 	local_irq_save(flags);
 	jump_to_uncached();
 
-	for (v = start; v < end; v+=L1_CACHE_BYTES) {
-		unsigned long addr = (v & 0x000007f0);
-		int way;
-		/* O-Cache writeback */
-		for (way = 0; way < 4; way++) {
-			unsigned long data =  __raw_readl(CACHE_OC_ADDRESS_ARRAY | addr | (way << 11));
-			if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) {
-				data &= ~SH_CACHE_UPDATED;
-				__raw_writel(data, CACHE_OC_ADDRESS_ARRAY | addr | (way << 11));
-			}
-		}
-		/* I-Cache invalidate */
-		__raw_writel(addr,
-			  CACHE_IC_ADDRESS_ARRAY | addr | 0x00000008);
+	/* I-Cache invalidate */
+	/* If there are too many pages then just blow the cache */
+	if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) {
+		__raw_writel(__raw_readl(SH_CCR) | CCR_ICACHE_INVALIDATE,
+			     SH_CCR);
+	} else {
+		for (v = start; v < end; v += L1_CACHE_BYTES)
+			sh2a_invalidate_line(CACHE_IC_ADDRESS_ARRAY, v);
 	}
 
 	back_to_cached();
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 2cfae81914a..51d8f7f31d1 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -18,6 +18,7 @@
 #include <linux/highmem.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
+#include <asm/cache_insns.h>
 #include <asm/cacheflush.h>
 
 /*
@@ -114,7 +115,7 @@ static void sh4_flush_dcache_page(void *arg)
 	struct address_space *mapping = page_mapping(page);
 
 	if (mapping && !mapping_mapped(mapping))
-		set_bit(PG_dcache_dirty, &page->flags);
+		clear_bit(PG_dcache_clean, &page->flags);
 	else
 #endif
 		flush_cache_one(CACHE_OC_ADDRESS_ARRAY |
@@ -132,9 +133,9 @@ static void flush_icache_all(void)
 	jump_to_uncached();
 
 	/* Flush I-cache */
-	ccr = __raw_readl(CCR);
+	ccr = __raw_readl(SH_CCR);
 	ccr |= CCR_CACHE_ICI;
-	__raw_writel(ccr, CCR);
+	__raw_writel(ccr, SH_CCR);
 
 	/*
 	 * back_to_cached() will take care of the barrier for us, don't add
@@ -239,12 +240,12 @@ static void sh4_flush_cache_page(void *args)
 		 * another ASID than the current one.
 		 */
 		map_coherent = (current_cpu_data.dcache.n_aliases &&
-			!test_bit(PG_dcache_dirty, &page->flags) &&
+			test_bit(PG_dcache_clean, &page->flags) &&
 			page_mapped(page));
 		if (map_coherent)
 			vaddr = kmap_coherent(page, address);
 		else
-			vaddr = kmap_atomic(page, KM_USER0);
+			vaddr = kmap_atomic(page);
 
 		address = (unsigned long)vaddr;
 	}
@@ -259,7 +260,7 @@ static void sh4_flush_cache_page(void *args)
 		if (map_coherent)
 			kunmap_coherent(vaddr);
 		else
-			kunmap_atomic(vaddr, KM_USER0);
+			kunmap_atomic(vaddr);
 	}
 }
 
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index eb4cc4ec795..d1bffbcd9d5 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -568,7 +568,7 @@ static void sh5_flush_dcache_page(void *page)
 }
 
 /*
- * Flush the range [start,end] of kernel virtual adddress space from
+ * Flush the range [start,end] of kernel virtual address space from
  * the I-cache.  The corresponding range must be purged from the
  * D-cache also because the SH-5 doesn't have cache snooping between
  * the caches.  The addresses will be visible through the superpage
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index f498da1cce7..7729cca727e 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -139,7 +139,7 @@ static void sh7705_flush_dcache_page(void *arg)
 	struct address_space *mapping = page_mapping(page);
 
 	if (mapping && !mapping_mapped(mapping))
-		set_bit(PG_dcache_dirty, &page->flags);
+		clear_bit(PG_dcache_clean, &page->flags);
 	else
 		__flush_dcache_page(__pa(page_address(page)));
 }
diff --git a/arch/sh/mm/cache-shx3.c b/arch/sh/mm/cache-shx3.c
new file mode 100644
index 00000000000..24c58b7dc02
--- /dev/null
+++ b/arch/sh/mm/cache-shx3.c
@@ -0,0 +1,44 @@
+/*
+ * arch/sh/mm/cache-shx3.c - SH-X3 optimized cache ops
+ *
+ * Copyright (C) 2010  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/cache.h>
+
+#define CCR_CACHE_SNM	0x40000		/* Hardware-assisted synonym avoidance */
+#define CCR_CACHE_IBE	0x1000000	/* ICBI broadcast */
+
+void __init shx3_cache_init(void)
+{
+	unsigned int ccr;
+
+	ccr = __raw_readl(SH_CCR);
+
+	/*
+	 * If we've got cache aliases, resolve them in hardware.
+	 */
+	if (boot_cpu_data.dcache.n_aliases || boot_cpu_data.icache.n_aliases) {
+		ccr |= CCR_CACHE_SNM;
+
+		boot_cpu_data.icache.n_aliases = 0;
+		boot_cpu_data.dcache.n_aliases = 0;
+
+		pr_info("Enabling hardware synonym avoidance\n");
+	}
+
+#ifdef CONFIG_SMP
+	/*
+	 * Broadcast I-cache block invalidations by default.
+	 */
+	ccr |= CCR_CACHE_IBE;
+#endif
+
+	writel_uncached(ccr, SH_CCR);
+}
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index 0f4095d7ac8..097c2cdd117 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -60,14 +60,14 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		       unsigned long len)
 {
 	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
-	    !test_bit(PG_dcache_dirty, &page->flags)) {
+	    test_bit(PG_dcache_clean, &page->flags)) {
 		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
 		memcpy(vto, src, len);
 		kunmap_coherent(vto);
 	} else {
 		memcpy(dst, src, len);
 		if (boot_cpu_data.dcache.n_aliases)
-			set_bit(PG_dcache_dirty, &page->flags);
+			clear_bit(PG_dcache_clean, &page->flags);
 	}
 
 	if (vma->vm_flags & VM_EXEC)
@@ -79,14 +79,14 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 			 unsigned long len)
 {
 	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
-	    !test_bit(PG_dcache_dirty, &page->flags)) {
+	    test_bit(PG_dcache_clean, &page->flags)) {
 		void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
 		memcpy(dst, vfrom, len);
 		kunmap_coherent(vfrom);
 	} else {
 		memcpy(dst, src, len);
 		if (boot_cpu_data.dcache.n_aliases)
-			set_bit(PG_dcache_dirty, &page->flags);
+			clear_bit(PG_dcache_clean, &page->flags);
 	}
 }
 
@@ -95,23 +95,24 @@ void copy_user_highpage(struct page *to, struct page *from,
 {
 	void *vfrom, *vto;
 
-	vto = kmap_atomic(to, KM_USER1);
+	vto = kmap_atomic(to);
 
 	if (boot_cpu_data.dcache.n_aliases && page_mapped(from) &&
-	    !test_bit(PG_dcache_dirty, &from->flags)) {
+	    test_bit(PG_dcache_clean, &from->flags)) {
 		vfrom = kmap_coherent(from, vaddr);
 		copy_page(vto, vfrom);
 		kunmap_coherent(vfrom);
 	} else {
-		vfrom = kmap_atomic(from, KM_USER0);
+		vfrom = kmap_atomic(from);
 		copy_page(vto, vfrom);
-		kunmap_atomic(vfrom, KM_USER0);
+		kunmap_atomic(vfrom);
 	}
 
-	if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
+	if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK) ||
+	    (vma->vm_flags & VM_EXEC))
 		__flush_purge_region(vto, PAGE_SIZE);
 
-	kunmap_atomic(vto, KM_USER1);
+	kunmap_atomic(vto);
 	/* Make sure this page is cleared on other CPU's too before using it */
 	smp_wmb();
 }
@@ -119,14 +120,14 @@ EXPORT_SYMBOL(copy_user_highpage);
 
 void clear_user_highpage(struct page *page, unsigned long vaddr)
 {
-	void *kaddr = kmap_atomic(page, KM_USER0);
+	void *kaddr = kmap_atomic(page);
 
 	clear_page(kaddr);
 
 	if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK))
 		__flush_purge_region(kaddr, PAGE_SIZE);
 
-	kunmap_atomic(kaddr, KM_USER0);
+	kunmap_atomic(kaddr);
 }
 EXPORT_SYMBOL(clear_user_highpage);
 
@@ -141,7 +142,7 @@ void __update_cache(struct vm_area_struct *vma,
 
 	page = pfn_to_page(pfn);
 	if (pfn_valid(pfn)) {
-		int dirty = test_and_clear_bit(PG_dcache_dirty, &page->flags);
+		int dirty = !test_and_set_bit(PG_dcache_clean, &page->flags);
 		if (dirty)
 			__flush_purge_region(page_address(page), PAGE_SIZE);
 	}
@@ -153,7 +154,7 @@ void __flush_anon_page(struct page *page, unsigned long vmaddr)
 
 	if (pages_do_alias(addr, vmaddr)) {
 		if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
-		    !test_bit(PG_dcache_dirty, &page->flags)) {
+		    test_bit(PG_dcache_clean, &page->flags)) {
 			void *kaddr;
 
 			kaddr = kmap_coherent(page, vmaddr);
@@ -284,8 +285,8 @@ void __init cpu_cache_init(void)
 {
 	unsigned int cache_disabled = 0;
 
-#ifdef CCR
-	cache_disabled = !(__raw_readl(CCR) & CCR_CACHE_ENABLE);
+#ifdef SH_CCR
+	cache_disabled = !(__raw_readl(SH_CCR) & CCR_CACHE_ENABLE);
 #endif
 
 	compute_alias(&boot_cpu_data.icache);
@@ -334,6 +335,13 @@ void __init cpu_cache_init(void)
 		extern void __weak sh4_cache_init(void);
 
 		sh4_cache_init();
+
+		if ((boot_cpu_data.type == CPU_SH7786) ||
+		    (boot_cpu_data.type == CPU_SHX3)) {
+			extern void __weak shx3_cache_init(void);
+
+			shx3_cache_init();
+		}
 	}
 
 	if (boot_cpu_data.family == CPU_FAMILY_SH5) {
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 902967e3f84..b81d9dbf9fe 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -16,6 +16,7 @@
 #include <linux/dma-debug.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/gfp.h>
 #include <asm/cacheflush.h>
 #include <asm/addrspace.h>
 
@@ -32,16 +33,18 @@ static int __init dma_init(void)
 fs_initcall(dma_init);
 
 void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-				 dma_addr_t *dma_handle, gfp_t gfp)
+				 dma_addr_t *dma_handle, gfp_t gfp,
+				 struct dma_attrs *attrs)
 {
 	void *ret, *ret_nocache;
 	int order = get_order(size);
 
+	gfp |= __GFP_ZERO;
+
 	ret = (void *)__get_free_pages(gfp, order);
 	if (!ret)
 		return NULL;
 
-	memset(ret, 0, size);
 	/*
 	 * Pages from the page allocator may have data present in
 	 * cache. So flush the cache before using uncached memory.
@@ -62,7 +65,8 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 }
 
 void dma_generic_free_coherent(struct device *dev, size_t size,
-			       void *vaddr, dma_addr_t dma_handle)
+			       void *vaddr, dma_addr_t dma_handle,
+			       struct dma_attrs *attrs)
 {
 	int order = get_order(size);
 	unsigned long pfn = dma_handle >> PAGE_SHIFT;
@@ -77,21 +81,20 @@ void dma_generic_free_coherent(struct device *dev, size_t size,
 void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 		    enum dma_data_direction direction)
 {
-#if defined(CONFIG_CPU_SH5) || defined(CONFIG_PMB)
-	void *p1addr = vaddr;
-#else
-	void *p1addr = (void*) P1SEGADDR((unsigned long)vaddr);
-#endif
+	void *addr;
+
+	addr = __in_29bit_mode() ?
+	       (void *)CAC_ADDR((unsigned long)vaddr) : vaddr;
 
 	switch (direction) {
 	case DMA_FROM_DEVICE:		/* invalidate only */
-		__flush_invalidate_region(p1addr, size);
+		__flush_invalidate_region(addr, size);
 		break;
 	case DMA_TO_DEVICE:		/* writeback only */
-		__flush_wback_region(p1addr, size);
+		__flush_wback_region(addr, size);
 		break;
 	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
-		__flush_purge_region(p1addr, size);
+		__flush_purge_region(addr, size);
 		break;
 	default:
 		BUG();
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
new file mode 100644
index 00000000000..541dc610150
--- /dev/null
+++ b/arch/sh/mm/fault.c
@@ -0,0 +1,517 @@
+/*
+ * Page fault handler for SH with an MMU.
+ *
+ *  Copyright (C) 1999  Niibe Yutaka
+ *  Copyright (C) 2003 - 2012  Paul Mundt
+ *
+ *  Based on linux/arch/i386/mm/fault.c:
+ *   Copyright (C) 1995  Linus Torvalds
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/perf_event.h>
+#include <linux/kdebug.h>
+#include <asm/io_trapped.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/traps.h>
+
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
+{
+	int ret = 0;
+
+	if (kprobes_built_in() && !user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, trap))
+			ret = 1;
+		preempt_enable();
+	}
+
+	return ret;
+}
+
+static void
+force_sig_info_fault(int si_signo, int si_code, unsigned long address,
+		     struct task_struct *tsk)
+{
+	siginfo_t info;
+
+	info.si_signo	= si_signo;
+	info.si_errno	= 0;
+	info.si_code	= si_code;
+	info.si_addr	= (void __user *)address;
+
+	force_sig_info(si_signo, &info, tsk);
+}
+
+/*
+ * This is useful to dump out the page tables associated with
+ * 'addr' in mm 'mm'.
+ */
+static void show_pte(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+
+	if (mm) {
+		pgd = mm->pgd;
+	} else {
+		pgd = get_TTB();
+
+		if (unlikely(!pgd))
+			pgd = swapper_pg_dir;
+	}
+
+	printk(KERN_ALERT "pgd = %p\n", pgd);
+	pgd += pgd_index(addr);
+	printk(KERN_ALERT "[%08lx] *pgd=%0*Lx", addr,
+	       (u32)(sizeof(*pgd) * 2), (u64)pgd_val(*pgd));
+
+	do {
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		if (pgd_none(*pgd))
+			break;
+
+		if (pgd_bad(*pgd)) {
+			printk("(bad)");
+			break;
+		}
+
+		pud = pud_offset(pgd, addr);
+		if (PTRS_PER_PUD != 1)
+			printk(", *pud=%0*Lx", (u32)(sizeof(*pud) * 2),
+			       (u64)pud_val(*pud));
+
+		if (pud_none(*pud))
+			break;
+
+		if (pud_bad(*pud)) {
+			printk("(bad)");
+			break;
+		}
+
+		pmd = pmd_offset(pud, addr);
+		if (PTRS_PER_PMD != 1)
+			printk(", *pmd=%0*Lx", (u32)(sizeof(*pmd) * 2),
+			       (u64)pmd_val(*pmd));
+
+		if (pmd_none(*pmd))
+			break;
+
+		if (pmd_bad(*pmd)) {
+			printk("(bad)");
+			break;
+		}
+
+		/* We must not map this if we have highmem enabled */
+		if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
+			break;
+
+		pte = pte_offset_kernel(pmd, addr);
+		printk(", *pte=%0*Lx", (u32)(sizeof(*pte) * 2),
+		       (u64)pte_val(*pte));
+	} while (0);
+
+	printk("\n");
+}
+
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+	unsigned index = pgd_index(address);
+	pgd_t *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+
+	pgd += index;
+	pgd_k = init_mm.pgd + index;
+
+	if (!pgd_present(*pgd_k))
+		return NULL;
+
+	pud = pud_offset(pgd, address);
+	pud_k = pud_offset(pgd_k, address);
+	if (!pud_present(*pud_k))
+		return NULL;
+
+	if (!pud_present(*pud))
+	    set_pud(pud, *pud_k);
+
+	pmd = pmd_offset(pud, address);
+	pmd_k = pmd_offset(pud_k, address);
+	if (!pmd_present(*pmd_k))
+		return NULL;
+
+	if (!pmd_present(*pmd))
+		set_pmd(pmd, *pmd_k);
+	else {
+		/*
+		 * The page tables are fully synchronised so there must
+		 * be another reason for the fault. Return NULL here to
+		 * signal that we have not taken care of the fault.
+		 */
+		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+		return NULL;
+	}
+
+	return pmd_k;
+}
+
+#ifdef CONFIG_SH_STORE_QUEUES
+#define __FAULT_ADDR_LIMIT	P3_ADDR_MAX
+#else
+#define __FAULT_ADDR_LIMIT	VMALLOC_END
+#endif
+
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+	pgd_t *pgd_k;
+	pmd_t *pmd_k;
+	pte_t *pte_k;
+
+	/* Make sure we are in vmalloc/module/P3 area: */
+	if (!(address >= VMALLOC_START && address < __FAULT_ADDR_LIMIT))
+		return -1;
+
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 *
+	 * Do _not_ use "current" here. We might be inside
+	 * an interrupt in the middle of a task switch..
+	 */
+	pgd_k = get_TTB();
+	pmd_k = vmalloc_sync_one(pgd_k, address);
+	if (!pmd_k)
+		return -1;
+
+	pte_k = pte_offset_kernel(pmd_k, address);
+	if (!pte_present(*pte_k))
+		return -1;
+
+	return 0;
+}
+
+static void
+show_fault_oops(struct pt_regs *regs, unsigned long address)
+{
+	if (!oops_may_print())
+		return;
+
+	printk(KERN_ALERT "BUG: unable to handle kernel ");
+	if (address < PAGE_SIZE)
+		printk(KERN_CONT "NULL pointer dereference");
+	else
+		printk(KERN_CONT "paging request");
+
+	printk(KERN_CONT " at %08lx\n", address);
+	printk(KERN_ALERT "PC:");
+	printk_address(regs->pc, 1);
+
+	show_pte(NULL, address);
+}
+
+static noinline void
+no_context(struct pt_regs *regs, unsigned long error_code,
+	   unsigned long address)
+{
+	/* Are we prepared to handle this kernel fault?  */
+	if (fixup_exception(regs))
+		return;
+
+	if (handle_trapped_io(regs, address))
+		return;
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 */
+	bust_spinlocks(1);
+
+	show_fault_oops(regs, address);
+
+	die("Oops", regs, error_code);
+	bust_spinlocks(0);
+	do_exit(SIGKILL);
+}
+
+static void
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+		       unsigned long address, int si_code)
+{
+	struct task_struct *tsk = current;
+
+	/* User mode accesses just cause a SIGSEGV */
+	if (user_mode(regs)) {
+		/*
+		 * It's possible to have interrupts off here:
+		 */
+		local_irq_enable();
+
+		force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+
+		return;
+	}
+
+	no_context(regs, error_code, address);
+}
+
+static noinline void
+bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+		     unsigned long address)
+{
+	__bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
+}
+
+static void
+__bad_area(struct pt_regs *regs, unsigned long error_code,
+	   unsigned long address, int si_code)
+{
+	struct mm_struct *mm = current->mm;
+
+	/*
+	 * Something tried to access memory that isn't in our memory map..
+	 * Fix it, but check if it's kernel or user first..
+	 */
+	up_read(&mm->mmap_sem);
+
+	__bad_area_nosemaphore(regs, error_code, address, si_code);
+}
+
+static noinline void
+bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+{
+	__bad_area(regs, error_code, address, SEGV_MAPERR);
+}
+
+static noinline void
+bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
+		      unsigned long address)
+{
+	__bad_area(regs, error_code, address, SEGV_ACCERR);
+}
+
+static void
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+
+	up_read(&mm->mmap_sem);
+
+	/* Kernel mode? Handle exceptions or die: */
+	if (!user_mode(regs))
+		no_context(regs, error_code, address);
+
+	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+}
+
+static noinline int
+mm_fault_error(struct pt_regs *regs, unsigned long error_code,
+	       unsigned long address, unsigned int fault)
+{
+	/*
+	 * Pagefault was interrupted by SIGKILL. We have no reason to
+	 * continue pagefault.
+	 */
+	if (fatal_signal_pending(current)) {
+		if (!(fault & VM_FAULT_RETRY))
+			up_read(&current->mm->mmap_sem);
+		if (!user_mode(regs))
+			no_context(regs, error_code, address);
+		return 1;
+	}
+
+	if (!(fault & VM_FAULT_ERROR))
+		return 0;
+
+	if (fault & VM_FAULT_OOM) {
+		/* Kernel mode? Handle exceptions or die: */
+		if (!user_mode(regs)) {
+			up_read(&current->mm->mmap_sem);
+			no_context(regs, error_code, address);
+			return 1;
+		}
+		up_read(&current->mm->mmap_sem);
+
+		/*
+		 * We ran out of memory, call the OOM killer, and return the
+		 * userspace (which will retry the fault, or kill us if we got
+		 * oom-killed):
+		 */
+		pagefault_out_of_memory();
+	} else {
+		if (fault & VM_FAULT_SIGBUS)
+			do_sigbus(regs, error_code, address);
+		else
+			BUG();
+	}
+
+	return 1;
+}
+
+static inline int access_error(int error_code, struct vm_area_struct *vma)
+{
+	if (error_code & FAULT_CODE_WRITE) {
+		/* write, present and write, not present: */
+		if (unlikely(!(vma->vm_flags & VM_WRITE)))
+			return 1;
+		return 0;
+	}
+
+	/* ITLB miss on NX page */
+	if (unlikely((error_code & FAULT_CODE_ITLB) &&
+		     !(vma->vm_flags & VM_EXEC)))
+		return 1;
+
+	/* read, not present: */
+	if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
+		return 1;
+
+	return 0;
+}
+
+static int fault_in_kernel_space(unsigned long address)
+{
+	return address >= TASK_SIZE;
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
+					unsigned long error_code,
+					unsigned long address)
+{
+	unsigned long vec;
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+	int fault;
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+	tsk = current;
+	mm = tsk->mm;
+	vec = lookup_exception_vector();
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 */
+	if (unlikely(fault_in_kernel_space(address))) {
+		if (vmalloc_fault(address) >= 0)
+			return;
+		if (notify_page_fault(regs, vec))
+			return;
+
+		bad_area_nosemaphore(regs, error_code, address);
+		return;
+	}
+
+	if (unlikely(notify_page_fault(regs, vec)))
+		return;
+
+	/* Only enable interrupts if they were on before the fault */
+	if ((regs->sr & SR_IMASK) != SR_IMASK)
+		local_irq_enable();
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
+	/*
+	 * If we're in an interrupt, have no user context or are running
+	 * in an atomic region then we must not take the fault:
+	 */
+	if (unlikely(in_atomic() || !mm)) {
+		bad_area_nosemaphore(regs, error_code, address);
+		return;
+	}
+
+retry:
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, address);
+	if (unlikely(!vma)) {
+		bad_area(regs, error_code, address);
+		return;
+	}
+	if (likely(vma->vm_start <= address))
+		goto good_area;
+	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+		bad_area(regs, error_code, address);
+		return;
+	}
+	if (unlikely(expand_stack(vma, address))) {
+		bad_area(regs, error_code, address);
+		return;
+	}
+
+	/*
+	 * Ok, we have a good vm_area for this memory access, so
+	 * we can handle it..
+	 */
+good_area:
+	if (unlikely(access_error(error_code, vma))) {
+		bad_area_access_error(regs, error_code, address);
+		return;
+	}
+
+	set_thread_fault_code(error_code);
+
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (error_code & FAULT_CODE_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(mm, vma, address, flags);
+
+	if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
+		if (mm_fault_error(regs, error_code, address, fault))
+			return;
+
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR) {
+			tsk->maj_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+				      regs, address);
+		} else {
+			tsk->min_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+				      regs, address);
+		}
+		if (fault & VM_FAULT_RETRY) {
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
+
+			/*
+			 * No need to up_read(&mm->mmap_sem) as we would
+			 * have already released it in __lock_page_or_retry
+			 * in mm/filemap.c.
+			 */
+			goto retry;
+		}
+	}
+
+	up_read(&mm->mmap_sem);
+}
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
deleted file mode 100644
index 8bf79e3b7bd..00000000000
--- a/arch/sh/mm/fault_32.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Page fault handler for SH with an MMU.
- *
- *  Copyright (C) 1999  Niibe Yutaka
- *  Copyright (C) 2003 - 2009  Paul Mundt
- *
- *  Based on linux/arch/i386/mm/fault.c:
- *   Copyright (C) 1995  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/hardirq.h>
-#include <linux/kprobes.h>
-#include <linux/perf_event.h>
-#include <asm/io_trapped.h>
-#include <asm/system.h>
-#include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
-
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
-	int ret = 0;
-
-	if (kprobes_built_in() && !user_mode(regs)) {
-		preempt_disable();
-		if (kprobe_running() && kprobe_fault_handler(regs, trap))
-			ret = 1;
-		preempt_enable();
-	}
-
-	return ret;
-}
-
-static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
-{
-	unsigned index = pgd_index(address);
-	pgd_t *pgd_k;
-	pud_t *pud, *pud_k;
-	pmd_t *pmd, *pmd_k;
-
-	pgd += index;
-	pgd_k = init_mm.pgd + index;
-
-	if (!pgd_present(*pgd_k))
-		return NULL;
-
-	pud = pud_offset(pgd, address);
-	pud_k = pud_offset(pgd_k, address);
-	if (!pud_present(*pud_k))
-		return NULL;
-
-	if (!pud_present(*pud))
-	    set_pud(pud, *pud_k);
-
-	pmd = pmd_offset(pud, address);
-	pmd_k = pmd_offset(pud_k, address);
-	if (!pmd_present(*pmd_k))
-		return NULL;
-
-	if (!pmd_present(*pmd))
-		set_pmd(pmd, *pmd_k);
-	else {
-		/*
-		 * The page tables are fully synchronised so there must
-		 * be another reason for the fault. Return NULL here to
-		 * signal that we have not taken care of the fault.
-		 */
-		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
-		return NULL;
-	}
-
-	return pmd_k;
-}
-
-/*
- * Handle a fault on the vmalloc or module mapping area
- */
-static noinline int vmalloc_fault(unsigned long address)
-{
-	pgd_t *pgd_k;
-	pmd_t *pmd_k;
-	pte_t *pte_k;
-
-	/* Make sure we are in vmalloc/module/P3 area: */
-	if (!(address >= VMALLOC_START && address < P3_ADDR_MAX))
-		return -1;
-
-	/*
-	 * Synchronize this task's top level page-table
-	 * with the 'reference' page table.
-	 *
-	 * Do _not_ use "current" here. We might be inside
-	 * an interrupt in the middle of a task switch..
-	 */
-	pgd_k = get_TTB();
-	pmd_k = vmalloc_sync_one(pgd_k, address);
-	if (!pmd_k)
-		return -1;
-
-	pte_k = pte_offset_kernel(pmd_k, address);
-	if (!pte_present(*pte_k))
-		return -1;
-
-	return 0;
-}
-
-static int fault_in_kernel_space(unsigned long address)
-{
-	return address >= TASK_SIZE;
-}
-
-/*
- * This routine handles page faults.  It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- */
-asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
-					unsigned long writeaccess,
-					unsigned long address)
-{
-	unsigned long vec;
-	struct task_struct *tsk;
-	struct mm_struct *mm;
-	struct vm_area_struct * vma;
-	int si_code;
-	int fault;
-	siginfo_t info;
-
-	tsk = current;
-	mm = tsk->mm;
-	si_code = SEGV_MAPERR;
-	vec = lookup_exception_vector();
-
-	/*
-	 * We fault-in kernel-space virtual memory on-demand. The
-	 * 'reference' page table is init_mm.pgd.
-	 *
-	 * NOTE! We MUST NOT take any locks for this case. We may
-	 * be in an interrupt or a critical region, and should
-	 * only copy the information from the master page table,
-	 * nothing more.
-	 */
-	if (unlikely(fault_in_kernel_space(address))) {
-		if (vmalloc_fault(address) >= 0)
-			return;
-		if (notify_page_fault(regs, vec))
-			return;
-
-		goto bad_area_nosemaphore;
-	}
-
-	if (unlikely(notify_page_fault(regs, vec)))
-		return;
-
-	/* Only enable interrupts if they were on before the fault */
-	if ((regs->sr & SR_IMASK) != SR_IMASK)
-		local_irq_enable();
-
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
-
-	/*
-	 * If we're in an interrupt, have no user context or are running
-	 * in an atomic region then we must not take the fault:
-	 */
-	if (in_atomic() || !mm)
-		goto no_context;
-
-	down_read(&mm->mmap_sem);
-
-	vma = find_vma(mm, address);
-	if (!vma)
-		goto bad_area;
-	if (vma->vm_start <= address)
-		goto good_area;
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
-	if (expand_stack(vma, address))
-		goto bad_area;
-
-	/*
-	 * Ok, we have a good vm_area for this memory access, so
-	 * we can handle it..
-	 */
-good_area:
-	si_code = SEGV_ACCERR;
-	if (writeaccess) {
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
-	} else {
-		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
-			goto bad_area;
-	}
-
-	/*
-	 * If for any reason at all we couldn't handle the fault,
-	 * make sure we exit gracefully rather than endlessly redo
-	 * the fault.
-	 */
-survive:
-	fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
-	if (unlikely(fault & VM_FAULT_ERROR)) {
-		if (fault & VM_FAULT_OOM)
-			goto out_of_memory;
-		else if (fault & VM_FAULT_SIGBUS)
-			goto do_sigbus;
-		BUG();
-	}
-	if (fault & VM_FAULT_MAJOR) {
-		tsk->maj_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
-				     regs, address);
-	} else {
-		tsk->min_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
-				     regs, address);
-	}
-
-	up_read(&mm->mmap_sem);
-	return;
-
-	/*
-	 * Something tried to access memory that isn't in our memory map..
-	 * Fix it, but check if it's kernel or user first..
-	 */
-bad_area:
-	up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
-	if (user_mode(regs)) {
-		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		info.si_code = si_code;
-		info.si_addr = (void *) address;
-		force_sig_info(SIGSEGV, &info, tsk);
-		return;
-	}
-
-no_context:
-	/* Are we prepared to handle this kernel fault?  */
-	if (fixup_exception(regs))
-		return;
-
-	if (handle_trapped_io(regs, address))
-		return;
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- *
- */
-
-	bust_spinlocks(1);
-
-	if (oops_may_print()) {
-		unsigned long page;
-
-		if (address < PAGE_SIZE)
-			printk(KERN_ALERT "Unable to handle kernel NULL "
-					  "pointer dereference");
-		else
-			printk(KERN_ALERT "Unable to handle kernel paging "
-					  "request");
-		printk(" at virtual address %08lx\n", address);
-		printk(KERN_ALERT "pc = %08lx\n", regs->pc);
-		page = (unsigned long)get_TTB();
-		if (page) {
-			page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT];
-			printk(KERN_ALERT "*pde = %08lx\n", page);
-			if (page & _PAGE_PRESENT) {
-				page &= PAGE_MASK;
-				address &= 0x003ff000;
-				page = ((__typeof__(page) *)
-						__va(page))[address >>
-							    PAGE_SHIFT];
-				printk(KERN_ALERT "*pte = %08lx\n", page);
-			}
-		}
-	}
-
-	die("Oops", regs, writeaccess);
-	bust_spinlocks(0);
-	do_exit(SIGKILL);
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
-	up_read(&mm->mmap_sem);
-	if (is_global_init(current)) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (user_mode(regs))
-		do_group_exit(SIGKILL);
-	goto no_context;
-
-do_sigbus:
-	up_read(&mm->mmap_sem);
-
-	/*
-	 * Send a sigbus, regardless of whether we were in kernel
-	 * or user mode.
-	 */
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRERR;
-	info.si_addr = (void *)address;
-	force_sig_info(SIGBUS, &info, tsk);
-
-	/* Kernel mode? Handle exceptions or die */
-	if (!user_mode(regs))
-		goto no_context;
-}
-
-/*
- * Called with interrupts disabled.
- */
-asmlinkage int __kprobes
-handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
-	       unsigned long address)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	pte_t entry;
-
-	/*
-	 * We don't take page faults for P1, P2, and parts of P4, these
-	 * are always mapped, whether it be due to legacy behaviour in
-	 * 29-bit mode, or due to PMB configuration in 32-bit mode.
-	 */
-	if (address >= P3SEG && address < P3_ADDR_MAX) {
-		pgd = pgd_offset_k(address);
-	} else {
-		if (unlikely(address >= TASK_SIZE || !current->mm))
-			return 1;
-
-		pgd = pgd_offset(current->mm, address);
-	}
-
-	pud = pud_offset(pgd, address);
-	if (pud_none_or_clear_bad(pud))
-		return 1;
-	pmd = pmd_offset(pud, address);
-	if (pmd_none_or_clear_bad(pmd))
-		return 1;
-	pte = pte_offset_kernel(pmd, address);
-	entry = *pte;
-	if (unlikely(pte_none(entry) || pte_not_present(entry)))
-		return 1;
-	if (unlikely(writeaccess && !pte_write(entry)))
-		return 1;
-
-	if (writeaccess)
-		entry = pte_mkdirty(entry);
-	entry = pte_mkyoung(entry);
-
-	set_pte(pte, entry);
-
-#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
-	/*
-	 * SH-4 does not set MMUCR.RC to the corresponding TLB entry in
-	 * the case of an initial page write exception, so we need to
-	 * flush it in order to avoid potential TLB entry duplication.
-	 */
-	if (writeaccess == 2)
-		local_flush_tlb_one(get_asid(), address & PAGE_MASK);
-#endif
-
-	update_mmu_cache(NULL, address, pte);
-
-	return 0;
-}
diff --git a/arch/sh/mm/fault_64.c b/arch/sh/mm/fault_64.c
deleted file mode 100644
index 2b356cec248..00000000000
--- a/arch/sh/mm/fault_64.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * The SH64 TLB miss.
- *
- * Original code from fault.c
- * Copyright (C) 2000, 2001  Paolo Alberelli
- *
- * Fast PTE->TLB refill path
- * Copyright (C) 2003 Richard.Curnow@superh.com
- *
- * IMPORTANT NOTES :
- * The do_fast_page_fault function is called from a context in entry.S
- * where very few registers have been saved.  In particular, the code in
- * this file must be compiled not to use ANY caller-save registers that
- * are not part of the restricted save set.  Also, it means that code in
- * this file must not make calls to functions elsewhere in the kernel, or
- * else the excepting context will see corruption in its caller-save
- * registers.  Plus, the entry.S save area is non-reentrant, so this code
- * has to run with SR.BL==1, i.e. no interrupts taken inside it and panic
- * on any exception.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <asm/system.h>
-#include <asm/tlb.h>
-#include <asm/io.h>
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/mmu_context.h>
-#include <cpu/registers.h>
-
-/* Callable from fault.c, so not static */
-inline void __do_tlb_refill(unsigned long address,
-                            unsigned long long is_text_not_data, pte_t *pte)
-{
-	unsigned long long ptel;
-	unsigned long long pteh=0;
-	struct tlb_info *tlbp;
-	unsigned long long next;
-
-	/* Get PTEL first */
-	ptel = pte_val(*pte);
-
-	/*
-	 * Set PTEH register
-	 */
-	pteh = neff_sign_extend(address & MMU_VPN_MASK);
-
-	/* Set the ASID. */
-	pteh |= get_asid() << PTEH_ASID_SHIFT;
-	pteh |= PTEH_VALID;
-
-	/* Set PTEL register, set_pte has performed the sign extension */
-	ptel &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
-
-	tlbp = is_text_not_data ? &(cpu_data->itlb) : &(cpu_data->dtlb);
-	next = tlbp->next;
-	__flush_tlb_slot(next);
-	asm volatile ("putcfg %0,1,%2\n\n\t"
-		      "putcfg %0,0,%1\n"
-		      :  : "r" (next), "r" (pteh), "r" (ptel) );
-
-	next += TLB_STEP;
-	if (next > tlbp->last) next = tlbp->first;
-	tlbp->next = next;
-
-}
-
-static int handle_vmalloc_fault(struct mm_struct *mm,
-				unsigned long protection_flags,
-                                unsigned long long textaccess,
-				unsigned long address)
-{
-	pgd_t *dir;
-	pud_t *pud;
-	pmd_t *pmd;
-	static pte_t *pte;
-	pte_t entry;
-
-	dir = pgd_offset_k(address);
-
-	pud = pud_offset(dir, address);
-	if (pud_none_or_clear_bad(pud))
-		return 0;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none_or_clear_bad(pmd))
-		return 0;
-
-	pte = pte_offset_kernel(pmd, address);
-	entry = *pte;
-
-	if (pte_none(entry) || !pte_present(entry))
-		return 0;
-	if ((pte_val(entry) & protection_flags) != protection_flags)
-		return 0;
-
-        __do_tlb_refill(address, textaccess, pte);
-
-	return 1;
-}
-
-static int handle_tlbmiss(struct mm_struct *mm,
-			  unsigned long long protection_flags,
-			  unsigned long long textaccess,
-			  unsigned long address)
-{
-	pgd_t *dir;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	pte_t entry;
-
-	/* NB. The PGD currently only contains a single entry - there is no
-	   page table tree stored for the top half of the address space since
-	   virtual pages in that region should never be mapped in user mode.
-	   (In kernel mode, the only things in that region are the 512Mb super
-	   page (locked in), and vmalloc (modules) +  I/O device pages (handled
-	   by handle_vmalloc_fault), so no PGD for the upper half is required
-	   by kernel mode either).
-
-	   See how mm->pgd is allocated and initialised in pgd_alloc to see why
-	   the next test is necessary.  - RPC */
-	if (address >= (unsigned long) TASK_SIZE)
-		/* upper half - never has page table entries. */
-		return 0;
-
-	dir = pgd_offset(mm, address);
-	if (pgd_none(*dir) || !pgd_present(*dir))
-		return 0;
-	if (!pgd_present(*dir))
-		return 0;
-
-	pud = pud_offset(dir, address);
-	if (pud_none(*pud) || !pud_present(*pud))
-		return 0;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd) || !pmd_present(*pmd))
-		return 0;
-
-	pte = pte_offset_kernel(pmd, address);
-	entry = *pte;
-
-	if (pte_none(entry) || !pte_present(entry))
-		return 0;
-
-	/*
-	 * If the page doesn't have sufficient protection bits set to
-	 * service the kind of fault being handled, there's not much
-	 * point doing the TLB refill.  Punt the fault to the general
-	 * handler.
-	 */
-	if ((pte_val(entry) & protection_flags) != protection_flags)
-		return 0;
-
-        __do_tlb_refill(address, textaccess, pte);
-
-	return 1;
-}
-
-/*
- * Put all this information into one structure so that everything is just
- * arithmetic relative to a single base address.  This reduces the number
- * of movi/shori pairs needed just to load addresses of static data.
- */
-struct expevt_lookup {
-	unsigned short protection_flags[8];
-	unsigned char  is_text_access[8];
-	unsigned char  is_write_access[8];
-};
-
-#define PRU (1<<9)
-#define PRW (1<<8)
-#define PRX (1<<7)
-#define PRR (1<<6)
-
-#define DIRTY (_PAGE_DIRTY | _PAGE_ACCESSED)
-#define YOUNG (_PAGE_ACCESSED)
-
-/* Sized as 8 rather than 4 to allow checking the PTE's PRU bit against whether
-   the fault happened in user mode or privileged mode. */
-static struct expevt_lookup expevt_lookup_table = {
-	.protection_flags = {PRX, PRX, 0, 0, PRR, PRR, PRW, PRW},
-	.is_text_access   = {1,   1,   0, 0, 0,   0,   0,   0}
-};
-
-/*
-   This routine handles page faults that can be serviced just by refilling a
-   TLB entry from an existing page table entry.  (This case represents a very
-   large majority of page faults.) Return 1 if the fault was successfully
-   handled.  Return 0 if the fault could not be handled.  (This leads into the
-   general fault handling in fault.c which deals with mapping file-backed
-   pages, stack growth, segmentation faults, swapping etc etc)
- */
-asmlinkage int do_fast_page_fault(unsigned long long ssr_md,
-				  unsigned long long expevt,
-			          unsigned long address)
-{
-	struct task_struct *tsk;
-	struct mm_struct *mm;
-	unsigned long long textaccess;
-	unsigned long long protection_flags;
-	unsigned long long index;
-	unsigned long long expevt4;
-
-	/* The next few lines implement a way of hashing EXPEVT into a
-	 * small array index which can be used to lookup parameters
-	 * specific to the type of TLBMISS being handled.
-	 *
-	 * Note:
-	 *	ITLBMISS has EXPEVT==0xa40
-	 *	RTLBMISS has EXPEVT==0x040
-	 *	WTLBMISS has EXPEVT==0x060
-	 */
-	expevt4 = (expevt >> 4);
-	/* TODO : xor ssr_md into this expression too. Then we can check
-	 * that PRU is set when it needs to be. */
-	index = expevt4 ^ (expevt4 >> 5);
-	index &= 7;
-	protection_flags = expevt_lookup_table.protection_flags[index];
-	textaccess       = expevt_lookup_table.is_text_access[index];
-
-	/* SIM
-	 * Note this is now called with interrupts still disabled
-	 * This is to cope with being called for a missing IO port
-	 * address with interrupts disabled. This should be fixed as
-	 * soon as we have a better 'fast path' miss handler.
-	 *
-	 * Plus take care how you try and debug this stuff.
-	 * For example, writing debug data to a port which you
-	 * have just faulted on is not going to work.
-	 */
-
-	tsk = current;
-	mm = tsk->mm;
-
-	if ((address >= VMALLOC_START && address < VMALLOC_END) ||
-	    (address >= IOBASE_VADDR  && address < IOBASE_END)) {
-		if (ssr_md)
-			/*
-			 * Process-contexts can never have this address
-			 * range mapped
-			 */
-			if (handle_vmalloc_fault(mm, protection_flags,
-						 textaccess, address))
-				return 1;
-	} else if (!in_interrupt() && mm) {
-		if (handle_tlbmiss(mm, protection_flags, textaccess, address))
-			return 1;
-	}
-
-	return 0;
-}
diff --git a/arch/sh/mm/flush-sh4.c b/arch/sh/mm/flush-sh4.c
index cef402678f4..0b85dd9dd3a 100644
--- a/arch/sh/mm/flush-sh4.c
+++ b/arch/sh/mm/flush-sh4.c
@@ -1,6 +1,8 @@
 #include <linux/mm.h>
 #include <asm/mmu_context.h>
+#include <asm/cache_insns.h>
 #include <asm/cacheflush.h>
+#include <asm/traps.h>
 
 /*
  * Write back the dirty D-caches, but not invalidate them.
diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c
new file mode 100644
index 00000000000..bf8daf9d9c9
--- /dev/null
+++ b/arch/sh/mm/gup.c
@@ -0,0 +1,273 @@
+/*
+ * Lockless get_user_pages_fast for SuperH
+ *
+ * Copyright (C) 2009 - 2010  Paul Mundt
+ *
+ * Cloned from the x86 and PowerPC versions, by:
+ *
+ *	Copyright (C) 2008 Nick Piggin
+ *	Copyright (C) 2008 Novell Inc.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
+
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+#ifndef CONFIG_X2TLB
+	return ACCESS_ONCE(*ptep);
+#else
+	/*
+	 * With get_user_pages_fast, we walk down the pagetables without
+	 * taking any locks.  For this we would like to load the pointers
+	 * atomically, but that is not possible with 64-bit PTEs.  What
+	 * we do have is the guarantee that a pte will only either go
+	 * from not present to present, or present to not present or both
+	 * -- it will not switch to a completely different present page
+	 * without a TLB flush in between; something that we are blocking
+	 * by holding interrupts off.
+	 *
+	 * Setting ptes from not present to present goes:
+	 * ptep->pte_high = h;
+	 * smp_wmb();
+	 * ptep->pte_low = l;
+	 *
+	 * And present to not present goes:
+	 * ptep->pte_low = 0;
+	 * smp_wmb();
+	 * ptep->pte_high = 0;
+	 *
+	 * We must ensure here that the load of pte_low sees l iff pte_high
+	 * sees h. We load pte_high *after* loading pte_low, which ensures we
+	 * don't see an older value of pte_high.  *Then* we recheck pte_low,
+	 * which ensures that we haven't picked up a changed pte high. We might
+	 * have got rubbish values from pte_low and pte_high, but we are
+	 * guaranteed that pte_low will not have the present bit set *unless*
+	 * it is 'l'. And get_user_pages_fast only operates on present ptes, so
+	 * we're safe.
+	 *
+	 * gup_get_pte should not be used or copied outside gup.c without being
+	 * very careful -- it does not atomically load the pte or anything that
+	 * is likely to be useful for you.
+	 */
+	pte_t pte;
+
+retry:
+	pte.pte_low = ptep->pte_low;
+	smp_rmb();
+	pte.pte_high = ptep->pte_high;
+	smp_rmb();
+	if (unlikely(pte.pte_low != ptep->pte_low))
+		goto retry;
+
+	return pte;
+#endif
+}
+
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	u64 mask, result;
+	pte_t *ptep;
+
+#ifdef CONFIG_X2TLB
+	result = _PAGE_PRESENT | _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ);
+	if (write)
+		result |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE);
+#elif defined(CONFIG_SUPERH64)
+	result = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
+	if (write)
+		result |= _PAGE_WRITE;
+#else
+	result = _PAGE_PRESENT | _PAGE_USER;
+	if (write)
+		result |= _PAGE_RW;
+#endif
+
+	mask = result | _PAGE_SPECIAL;
+
+	ptep = pte_offset_map(&pmd, addr);
+	do {
+		pte_t pte = gup_get_pte(ptep);
+		struct page *page;
+
+		if ((pte_val(pte) & mask) != result) {
+			pte_unmap(ptep);
+			return 0;
+		}
+		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+		page = pte_page(pte);
+		get_page(page);
+		pages[*nr] = page;
+		(*nr)++;
+
+	} while (ptep++, addr += PAGE_SIZE, addr != end);
+	pte_unmap(ptep - 1);
+
+	return 1;
+}
+
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pmd_t *pmdp;
+
+	pmdp = pmd_offset(&pud, addr);
+	do {
+		pmd_t pmd = *pmdp;
+
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(pmd))
+			return 0;
+		if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+			return 0;
+	} while (pmdp++, addr = next, addr != end);
+
+	return 1;
+}
+
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+			int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pud_t *pudp;
+
+	pudp = pud_offset(&pgd, addr);
+	do {
+		pud_t pud = *pudp;
+
+		next = pud_addr_end(addr, end);
+		if (pud_none(pud))
+			return 0;
+		if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+			return 0;
+	} while (pudp++, addr = next, addr != end);
+
+	return 1;
+}
+
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			  struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next;
+	unsigned long flags;
+	pgd_t *pgdp;
+	int nr = 0;
+
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+					(void __user *)start, len)))
+		return 0;
+
+	/*
+	 * This doesn't prevent pagetable teardown, but does prevent
+	 * the pagetables and pages from being freed.
+	 */
+	local_irq_save(flags);
+	pgdp = pgd_offset(mm, addr);
+	do {
+		pgd_t pgd = *pgdp;
+
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			break;
+		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+			break;
+	} while (pgdp++, addr = next, addr != end);
+	local_irq_restore(flags);
+
+	return nr;
+}
+
+/**
+ * get_user_pages_fast() - pin user pages in memory
+ * @start:	starting user address
+ * @nr_pages:	number of pages from start to pin
+ * @write:	whether pages will be written to
+ * @pages:	array that receives pointers to the pages pinned.
+ *		Should be at least nr_pages long.
+ *
+ * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * If not successful, it will fall back to taking the lock and
+ * calling get_user_pages().
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno.
+ */
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next;
+	pgd_t *pgdp;
+	int nr = 0;
+
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+
+	end = start + len;
+	if (end < start)
+		goto slow_irqon;
+
+	local_irq_disable();
+	pgdp = pgd_offset(mm, addr);
+	do {
+		pgd_t pgd = *pgdp;
+
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			goto slow;
+		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+			goto slow;
+	} while (pgdp++, addr = next, addr != end);
+	local_irq_enable();
+
+	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+	return nr;
+
+	{
+		int ret;
+
+slow:
+		local_irq_enable();
+slow_irqon:
+		/* Try to get the remaining pages with get_user_pages */
+		start += nr << PAGE_SHIFT;
+		pages += nr;
+
+		down_read(&mm->mmap_sem);
+		ret = get_user_pages(current, mm, start,
+			(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
+		up_read(&mm->mmap_sem);
+
+		/* Have to be a bit careful with return values */
+		if (nr > 0) {
+			if (ret < 0)
+				ret = nr;
+			else
+				ret += nr;
+		}
+
+		return ret;
+	}
+}
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 9304117039c..d7762349ea4 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -13,7 +13,6 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
-#include <linux/slab.h>
 #include <linux/sysctl.h>
 
 #include <asm/mman.h>
@@ -36,7 +35,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 		if (pud) {
 			pmd = pmd_alloc(mm, pud, addr);
 			if (pmd)
-				pte = pte_alloc_map(mm, pmd, addr);
+				pte = pte_alloc_map(mm, NULL, pmd, addr);
 		}
 	}
 
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 68028e8f26c..2d089fe2cba 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -2,7 +2,7 @@
  * linux/arch/sh/mm/init.c
  *
  *  Copyright (C) 1999  Niibe Yutaka
- *  Copyright (C) 2002 - 2007  Paul Mundt
+ *  Copyright (C) 2002 - 2011  Paul Mundt
  *
  *  Based on linux/arch/i386/mm/init.c:
  *   Copyright (C) 1995  Linus Torvalds
@@ -10,29 +10,43 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/init.h>
+#include <linux/gfp.h>
 #include <linux/bootmem.h>
 #include <linux/proc_fs.h>
 #include <linux/pagemap.h>
 #include <linux/percpu.h>
 #include <linux/io.h>
+#include <linux/memblock.h>
 #include <linux/dma-mapping.h>
+#include <linux/export.h>
 #include <asm/mmu_context.h>
+#include <asm/mmzone.h>
+#include <asm/kexec.h>
 #include <asm/tlb.h>
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
+#include <asm/setup.h>
 #include <asm/cache.h>
 #include <asm/sizes.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
+void __init generic_mem_init(void)
+{
+	memblock_add(__MEMORY_START, __MEMORY_SIZE);
+}
+
+void __init __weak plat_mem_setup(void)
+{
+	/* Nothing to see here, move along. */
+}
+
 #ifdef CONFIG_MMU
 static pte_t *__get_pte_phys(unsigned long addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte;
 
 	pgd = pgd_offset_k(addr);
 	if (pgd_none(*pgd)) {
@@ -52,8 +66,7 @@ static pte_t *__get_pte_phys(unsigned long addr)
 		return NULL;
 	}
 
-	pte = pte_offset_kernel(pmd, addr);
-	return pte;
+	return pte_offset_kernel(pmd, addr);
 }
 
 static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
@@ -110,13 +123,45 @@ void __clear_fixmap(enum fixed_addresses idx, pgprot_t prot)
 	clear_pte_phys(address, prot);
 }
 
+static pmd_t * __init one_md_table_init(pud_t *pud)
+{
+	if (pud_none(*pud)) {
+		pmd_t *pmd;
+
+		pmd = alloc_bootmem_pages(PAGE_SIZE);
+		pud_populate(&init_mm, pud, pmd);
+		BUG_ON(pmd != pmd_offset(pud, 0));
+	}
+
+	return pmd_offset(pud, 0);
+}
+
+static pte_t * __init one_page_table_init(pmd_t *pmd)
+{
+	if (pmd_none(*pmd)) {
+		pte_t *pte;
+
+		pte = alloc_bootmem_pages(PAGE_SIZE);
+		pmd_populate_kernel(&init_mm, pmd, pte);
+		BUG_ON(pte != pte_offset_kernel(pmd, 0));
+	}
+
+	return pte_offset_kernel(pmd, 0);
+}
+
+static pte_t * __init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
+					    unsigned long vaddr, pte_t *lastpte)
+{
+	return pte;
+}
+
 void __init page_table_range_init(unsigned long start, unsigned long end,
 					 pgd_t *pgd_base)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte;
+	pte_t *pte = NULL;
 	int i, j, k;
 	unsigned long vaddr;
 
@@ -129,19 +174,13 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
 	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
 		pud = (pud_t *)pgd;
 		for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) {
-#ifdef __PAGETABLE_PMD_FOLDED
-			pmd = (pmd_t *)pud;
-#else
-			pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
-			pud_populate(&init_mm, pud, pmd);
+			pmd = one_md_table_init(pud);
+#ifndef __PAGETABLE_PMD_FOLDED
 			pmd += k;
 #endif
 			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
-				if (pmd_none(*pmd)) {
-					pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-					pmd_populate_kernel(&init_mm, pmd, pte);
-					BUG_ON(pte != pte_offset_kernel(pmd, 0));
-				}
+				pte = page_table_kmap_check(one_page_table_init(pmd),
+							    pmd, vaddr, pte);
 				vaddr += PMD_SIZE;
 			}
 			k = 0;
@@ -151,15 +190,172 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
 }
 #endif	/* CONFIG_MMU */
 
-/*
- * paging_init() sets up the page tables
- */
+void __init allocate_pgdat(unsigned int nid)
+{
+	unsigned long start_pfn, end_pfn;
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	unsigned long phys;
+#endif
+
+	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	phys = __memblock_alloc_base(sizeof(struct pglist_data),
+				SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT);
+	/* Retry with all of system memory */
+	if (!phys)
+		phys = __memblock_alloc_base(sizeof(struct pglist_data),
+					SMP_CACHE_BYTES, memblock_end_of_DRAM());
+	if (!phys)
+		panic("Can't allocate pgdat for node %d\n", nid);
+
+	NODE_DATA(nid) = __va(phys);
+	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+#endif
+
+	NODE_DATA(nid)->node_start_pfn = start_pfn;
+	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+}
+
+static void __init bootmem_init_one_node(unsigned int nid)
+{
+	unsigned long total_pages, paddr;
+	unsigned long end_pfn;
+	struct pglist_data *p;
+
+	p = NODE_DATA(nid);
+
+	/* Nothing to do.. */
+	if (!p->node_spanned_pages)
+		return;
+
+	end_pfn = pgdat_end_pfn(p);
+
+	total_pages = bootmem_bootmap_pages(p->node_spanned_pages);
+
+	paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE);
+	if (!paddr)
+		panic("Can't allocate bootmap for nid[%d]\n", nid);
+
+	init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn);
+
+	free_bootmem_with_active_regions(nid, end_pfn);
+
+	/*
+	 * XXX Handle initial reservations for the system memory node
+	 * only for the moment, we'll refactor this later for handling
+	 * reservations in other nodes.
+	 */
+	if (nid == 0) {
+		struct memblock_region *reg;
+
+		/* Reserve the sections we're already using. */
+		for_each_memblock(reserved, reg) {
+			reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
+		}
+	}
+
+	sparse_memory_present_with_active_regions(nid);
+}
+
+static void __init do_init_bootmem(void)
+{
+	struct memblock_region *reg;
+	int i;
+
+	/* Add active regions with valid PFNs. */
+	for_each_memblock(memory, reg) {
+		unsigned long start_pfn, end_pfn;
+		start_pfn = memblock_region_memory_base_pfn(reg);
+		end_pfn = memblock_region_memory_end_pfn(reg);
+		__add_active_range(0, start_pfn, end_pfn);
+	}
+
+	/* All of system RAM sits in node 0 for the non-NUMA case */
+	allocate_pgdat(0);
+	node_set_online(0);
+
+	plat_mem_setup();
+
+	for_each_online_node(i)
+		bootmem_init_one_node(i);
+
+	sparse_init();
+}
+
+static void __init early_reserve_mem(void)
+{
+	unsigned long start_pfn;
+	u32 zero_base = (u32)__MEMORY_START + (u32)PHYSICAL_OFFSET;
+	u32 start = zero_base + (u32)CONFIG_ZERO_PAGE_OFFSET;
+
+	/*
+	 * Partially used pages are not usable - thus
+	 * we are rounding upwards:
+	 */
+	start_pfn = PFN_UP(__pa(_end));
+
+	/*
+	 * Reserve the kernel text and Reserve the bootmem bitmap. We do
+	 * this in two steps (first step was init_bootmem()), because
+	 * this catches the (definitely buggy) case of us accidentally
+	 * initializing the bootmem allocator with an invalid RAM area.
+	 */
+	memblock_reserve(start, (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) - start);
+
+	/*
+	 * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET.
+	 */
+	if (CONFIG_ZERO_PAGE_OFFSET != 0)
+		memblock_reserve(zero_base, CONFIG_ZERO_PAGE_OFFSET);
+
+	/*
+	 * Handle additional early reservations
+	 */
+	check_for_initrd();
+	reserve_crashkernel();
+}
+
 void __init paging_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 	unsigned long vaddr, end;
 	int nid;
 
+	sh_mv.mv_mem_init();
+
+	early_reserve_mem();
+
+	/*
+	 * Once the early reservations are out of the way, give the
+	 * platforms a chance to kick out some memory.
+	 */
+	if (sh_mv.mv_mem_reserve)
+		sh_mv.mv_mem_reserve();
+
+	memblock_enforce_memory_limit(memory_limit);
+	memblock_allow_resize();
+
+	memblock_dump_all();
+
+	/*
+	 * Determine low and high memory ranges:
+	 */
+	max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	min_low_pfn = __MEMORY_START >> PAGE_SHIFT;
+
+	nodes_clear(node_online_map);
+
+	memory_start = (unsigned long)__va(__MEMORY_START);
+	memory_end = memory_start + (memory_limit ?: memblock_phys_mem_size());
+
+	uncached_init();
+	pmb_init();
+	do_init_bootmem();
+	ioremap_fixed_init();
+
 	/* We don't need to map the kernel through the TLB, as
 	 * it is permanatly mapped using P1. So clear the
 	 * entire pgd. */
@@ -211,32 +407,16 @@ unsigned int mem_init_done = 0;
 
 void __init mem_init(void)
 {
-	int codesize, datasize, initsize;
-	int nid;
+	pg_data_t *pgdat;
 
 	iommu_init();
 
-	num_physpages = 0;
 	high_memory = NULL;
+	for_each_online_pgdat(pgdat)
+		high_memory = max_t(void *, high_memory,
+				    __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT));
 
-	for_each_online_node(nid) {
-		pg_data_t *pgdat = NODE_DATA(nid);
-		unsigned long node_pages = 0;
-		void *node_high_memory;
-
-		num_physpages += pgdat->node_present_pages;
-
-		if (pgdat->node_spanned_pages)
-			node_pages = free_all_bootmem_node(pgdat);
-
-		totalram_pages += node_pages;
-
-		node_high_memory = (void *)__va((pgdat->node_start_pfn +
-						 pgdat->node_spanned_pages) <<
-						 PAGE_SHIFT);
-		if (node_high_memory > high_memory)
-			high_memory = node_high_memory;
-	}
+	free_all_bootmem();
 
 	/* Set this up early, so we can take care of the zero page */
 	cpu_cache_init();
@@ -247,19 +427,8 @@ void __init mem_init(void)
 
 	vsyscall_init();
 
-	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
-	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
-	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
-	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
-	       "%dk data, %dk init)\n",
-		nr_free_pages() << (PAGE_SHIFT-10),
-		num_physpages << (PAGE_SHIFT-10),
-		codesize >> 10,
-		datasize >> 10,
-		initsize >> 10);
-
-	printk(KERN_INFO "virtual kernel memory layout:\n"
+	mem_init_print_info(NULL);
+	pr_info("virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HIGHMEM
 		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
@@ -305,31 +474,13 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
-	unsigned long addr;
-
-	addr = (unsigned long)(&__init_begin);
-	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(addr));
-		init_page_count(virt_to_page(addr));
-		free_page(addr);
-		totalram_pages++;
-	}
-	printk("Freeing unused kernel memory: %ldk freed\n",
-	       ((unsigned long)&__init_end -
-	        (unsigned long)&__init_begin) >> 10);
+	free_initmem_default(-1);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	unsigned long p;
-	for (p = start; p < end; p += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(p));
-		init_page_count(virt_to_page(p));
-		free_page(p);
-		totalram_pages++;
-	}
-	printk("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
 
@@ -362,4 +513,21 @@ int memory_add_physaddr_to_nid(u64 addr)
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	struct zone *zone;
+	int ret;
+
+	zone = page_zone(pfn_to_page(start_pfn));
+	ret = __remove_pages(zone, start_pfn, nr_pages);
+	if (unlikely(ret))
+		pr_warn("%s: Failed, __remove_pages() == %d\n", __func__,
+			ret);
+
+	return ret;
+}
+#endif
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
index 1ab2385ecef..0c99ec2e7ed 100644
--- a/arch/sh/mm/ioremap.c
+++ b/arch/sh/mm/ioremap.c
@@ -14,6 +14,7 @@
  */
 #include <linux/vmalloc.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/io.h>
diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c
index 7f682e5dafc..efbe84af998 100644
--- a/arch/sh/mm/ioremap_fixed.c
+++ b/arch/sh/mm/ioremap_fixed.c
@@ -15,7 +15,6 @@
 #include <linux/io.h>
 #include <linux/bootmem.h>
 #include <linux/proc_fs.h>
-#include <linux/slab.h>
 #include <asm/fixmap.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c
index 15d74ea4209..ec29e14ec5a 100644
--- a/arch/sh/mm/kmap.c
+++ b/arch/sh/mm/kmap.c
@@ -34,7 +34,7 @@ void *kmap_coherent(struct page *page, unsigned long addr)
 	enum fixed_addresses idx;
 	unsigned long vaddr;
 
-	BUG_ON(test_bit(PG_dcache_dirty, &page->flags));
+	BUG_ON(!test_bit(PG_dcache_clean, &page->flags));
 
 	pagefault_disable();
 
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index afeb710ec5c..6777177807c 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -30,25 +30,13 @@ static inline unsigned long COLOUR_ALIGN(unsigned long addr,
 	return base + off;
 }
 
-static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr,
-					      unsigned long pgoff)
-{
-	unsigned long base = addr & ~shm_align_mask;
-	unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask;
-
-	if (base + off <= addr)
-		return base + off;
-
-	return base - off;
-}
-
 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	unsigned long start_addr;
 	int do_colour_align;
+	struct vm_unmapped_area_info info;
 
 	if (flags & MAP_FIXED) {
 		/* We do not accept a shared mapping if it would violate
@@ -79,47 +67,13 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 			return addr;
 	}
 
-	if (len > mm->cached_hole_size) {
-		start_addr = addr = mm->free_area_cache;
-	} else {
-	        mm->cached_hole_size = 0;
-		start_addr = addr = TASK_UNMAPPED_BASE;
-	}
-
-full_search:
-	if (do_colour_align)
-		addr = COLOUR_ALIGN(addr, pgoff);
-	else
-		addr = PAGE_ALIGN(mm->free_area_cache);
-
-	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
-		/* At this point:  (!vma || addr < vma->vm_end). */
-		if (unlikely(TASK_SIZE - len < addr)) {
-			/*
-			 * Start a new search - just in case we missed
-			 * some holes.
-			 */
-			if (start_addr != TASK_UNMAPPED_BASE) {
-				start_addr = addr = TASK_UNMAPPED_BASE;
-				mm->cached_hole_size = 0;
-				goto full_search;
-			}
-			return -ENOMEM;
-		}
-		if (likely(!vma || addr + len <= vma->vm_start)) {
-			/*
-			 * Remember the place where we stopped the search:
-			 */
-			mm->free_area_cache = addr + len;
-			return addr;
-		}
-		if (addr + mm->cached_hole_size < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
-
-		addr = vma->vm_end;
-		if (do_colour_align)
-			addr = COLOUR_ALIGN(addr, pgoff);
-	}
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = TASK_UNMAPPED_BASE;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = do_colour_align ? (PAGE_MASK & shm_align_mask) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	return vm_unmapped_area(&info);
 }
 
 unsigned long
@@ -131,6 +85,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	struct mm_struct *mm = current->mm;
 	unsigned long addr = addr0;
 	int do_colour_align;
+	struct vm_unmapped_area_info info;
 
 	if (flags & MAP_FIXED) {
 		/* We do not accept a shared mapping if it would violate
@@ -162,73 +117,27 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 			return addr;
 	}
 
-	/* check if free_area_cache is useful for us */
-	if (len <= mm->cached_hole_size) {
-	        mm->cached_hole_size = 0;
-		mm->free_area_cache = mm->mmap_base;
-	}
-
-	/* either no address requested or can't fit in requested address hole */
-	addr = mm->free_area_cache;
-	if (do_colour_align) {
-		unsigned long base = COLOUR_ALIGN_DOWN(addr-len, pgoff);
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = mm->mmap_base;
+	info.align_mask = do_colour_align ? (PAGE_MASK & shm_align_mask) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	addr = vm_unmapped_area(&info);
 
-		addr = base + len;
-	}
-
-	/* make sure it can fit in the remaining address space */
-	if (likely(addr > len)) {
-		vma = find_vma(mm, addr-len);
-		if (!vma || addr <= vma->vm_start) {
-			/* remember the address as a hint for next time */
-			return (mm->free_area_cache = addr-len);
-		}
-	}
-
-	if (unlikely(mm->mmap_base < len))
-		goto bottomup;
-
-	addr = mm->mmap_base-len;
-	if (do_colour_align)
-		addr = COLOUR_ALIGN_DOWN(addr, pgoff);
-
-	do {
-		/*
-		 * Lookup failure means no vma is above this address,
-		 * else if new region fits below vma->vm_start,
-		 * return with success:
-		 */
-		vma = find_vma(mm, addr);
-		if (likely(!vma || addr+len <= vma->vm_start)) {
-			/* remember the address as a hint for next time */
-			return (mm->free_area_cache = addr);
-		}
-
-		/* remember the largest hole we saw so far */
-		if (addr + mm->cached_hole_size < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
-
-		/* try just below the current vma->vm_start */
-		addr = vma->vm_start-len;
-		if (do_colour_align)
-			addr = COLOUR_ALIGN_DOWN(addr, pgoff);
-	} while (likely(len < vma->vm_start));
-
-bottomup:
 	/*
 	 * A failed mmap() very likely causes application failure,
 	 * so fall back to the bottom-up function here. This scenario
 	 * can happen with large stack limits and large mmap()
 	 * allocations.
 	 */
-	mm->cached_hole_size = ~0UL;
-	mm->free_area_cache = TASK_UNMAPPED_BASE;
-	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-	/*
-	 * Restore the topdown base:
-	 */
-	mm->free_area_cache = mm->mmap_base;
-	mm->cached_hole_size = ~0UL;
+	if (addr & ~PAGE_MASK) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = TASK_SIZE;
+		addr = vm_unmapped_area(&info);
+	}
 
 	return addr;
 }
@@ -238,7 +147,7 @@ bottomup:
  * You really shouldn't be using read() or write() on /dev/mem.  This
  * might go away in the future.
  */
-int valid_phys_addr_range(unsigned long addr, size_t count)
+int valid_phys_addr_range(phys_addr_t addr, size_t count)
 {
 	if (addr < __MEMORY_START)
 		return 0;
diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c
index 7694f50c903..36312d254fa 100644
--- a/arch/sh/mm/nommu.c
+++ b/arch/sh/mm/nommu.c
@@ -67,6 +67,10 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 	BUG();
 }
 
+void __flush_tlb_global(void)
+{
+}
+
 void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
 }
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 961b34085e3..3d85225b9e9 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -9,7 +9,7 @@
  */
 #include <linux/module.h>
 #include <linux/bootmem.h>
-#include <linux/lmb.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/numa.h>
 #include <linux/pfn.h>
@@ -24,44 +24,6 @@ EXPORT_SYMBOL_GPL(node_data);
  * latency. Each node's pgdat is node-local at the beginning of the node,
  * immediately followed by the node mem map.
  */
-void __init setup_memory(void)
-{
-	unsigned long free_pfn = PFN_UP(__pa(_end));
-	u64 base = min_low_pfn << PAGE_SHIFT;
-	u64 size = (max_low_pfn << PAGE_SHIFT) - base;
-
-	lmb_add(base, size);
-
-	/* Reserve the LMB regions used by the kernel, initrd, etc.. */
-	lmb_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET,
-		    (PFN_PHYS(free_pfn) + PAGE_SIZE - 1) -
-		    (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET));
-
-	/*
-	 * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET.
-	 */
-	if (CONFIG_ZERO_PAGE_OFFSET != 0)
-		lmb_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET);
-
-	lmb_analyze();
-	lmb_dump_all();
-
-	/*
-	 * Node 0 sets up its pgdat at the first available pfn,
-	 * and bumps it up before setting up the bootmem allocator.
-	 */
-	NODE_DATA(0) = pfn_to_kaddr(free_pfn);
-	memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
-	free_pfn += PFN_UP(sizeof(struct pglist_data));
-	NODE_DATA(0)->bdata = &bootmem_node_data[0];
-
-	/* Set up node 0 */
-	setup_bootmem_allocator(free_pfn);
-
-	/* Give the platforms a chance to hook up their nodes */
-	plat_mem_setup();
-}
-
 void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 {
 	unsigned long bootmap_pages;
@@ -77,12 +39,12 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 	pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
 			 PAGE_KERNEL);
 
-	lmb_add(start, end - start);
+	memblock_add(start, end - start);
 
 	__add_active_range(nid, start_pfn, end_pfn);
 
 	/* Node-local pgdat */
-	NODE_DATA(nid) = __va(lmb_alloc_base(sizeof(struct pglist_data),
+	NODE_DATA(nid) = __va(memblock_alloc_base(sizeof(struct pglist_data),
 					     SMP_CACHE_BYTES, end));
 	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
@@ -92,7 +54,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 
 	/* Node-local bootmap */
 	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-	bootmem_paddr = lmb_alloc_base(bootmap_pages << PAGE_SHIFT,
+	bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT,
 				       PAGE_SIZE, end);
 	init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
 			  start_pfn, end_pfn);
diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c
index 6f21fb1d872..26e03a1f7ca 100644
--- a/arch/sh/mm/pgtable.c
+++ b/arch/sh/mm/pgtable.c
@@ -1,4 +1,5 @@
 #include <linux/mm.h>
+#include <linux/slab.h>
 
 #define PGALLOC_GFP GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO
 
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index a4662e2782c..7160c9fd6fe 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -3,7 +3,7 @@
  *
  * Privileged Space Mapping Buffer (PMB) Support.
  *
- * Copyright (C) 2005 - 2010  Paul Mundt
+ * Copyright (C) 2005 - 2011  Paul Mundt
  * Copyright (C) 2010  Matt Fleming
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -12,10 +12,9 @@
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
-#include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/debugfs.h>
 #include <linux/fs.h>
@@ -26,7 +25,6 @@
 #include <linux/vmalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/sizes.h>
-#include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -41,7 +39,7 @@ struct pmb_entry {
 	unsigned long flags;
 	unsigned long size;
 
-	spinlock_t lock;
+	raw_spinlock_t lock;
 
 	/*
 	 * 0 .. NR_PMB_ENTRIES for specific entry selection, or
@@ -266,7 +264,7 @@ static struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
 
 	memset(pmbe, 0, sizeof(struct pmb_entry));
 
-	spin_lock_init(&pmbe->lock);
+	raw_spin_lock_init(&pmbe->lock);
 
 	pmbe->vpn	= vpn;
 	pmbe->ppn	= ppn;
@@ -323,14 +321,16 @@ static void __clear_pmb_entry(struct pmb_entry *pmbe)
 	writel_uncached(data_val & ~PMB_V, data);
 }
 
+#ifdef CONFIG_PM
 static void set_pmb_entry(struct pmb_entry *pmbe)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&pmbe->lock, flags);
+	raw_spin_lock_irqsave(&pmbe->lock, flags);
 	__set_pmb_entry(pmbe);
-	spin_unlock_irqrestore(&pmbe->lock, flags);
+	raw_spin_unlock_irqrestore(&pmbe->lock, flags);
 }
+#endif /* CONFIG_PM */
 
 int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys,
 		     unsigned long size, pgprot_t prot)
@@ -340,6 +340,8 @@ int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys,
 	unsigned long flags, pmb_flags;
 	int i, mapped;
 
+	if (size < SZ_16M)
+		return -EINVAL;
 	if (!pmb_addr_valid(vaddr, size))
 		return -EFAULT;
 	if (pmb_mapping_exists(vaddr, phys, size))
@@ -365,7 +367,7 @@ int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys,
 				return PTR_ERR(pmbe);
 			}
 
-			spin_lock_irqsave(&pmbe->lock, flags);
+			raw_spin_lock_irqsave(&pmbe->lock, flags);
 
 			pmbe->size = pmb_sizes[i].size;
 
@@ -380,9 +382,10 @@ int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys,
 			 * entries for easier tear-down.
 			 */
 			if (likely(pmbp)) {
-				spin_lock(&pmbp->lock);
+				raw_spin_lock_nested(&pmbp->lock,
+						     SINGLE_DEPTH_NESTING);
 				pmbp->link = pmbe;
-				spin_unlock(&pmbp->lock);
+				raw_spin_unlock(&pmbp->lock);
 			}
 
 			pmbp = pmbe;
@@ -395,7 +398,7 @@ int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys,
 			i--;
 			mapped++;
 
-			spin_unlock_irqrestore(&pmbe->lock, flags);
+			raw_spin_unlock_irqrestore(&pmbe->lock, flags);
 		}
 	} while (size >= SZ_16M);
 
@@ -624,15 +627,14 @@ static void __init pmb_synchronize(void)
 			continue;
 		}
 
-		spin_lock_irqsave(&pmbe->lock, irqflags);
+		raw_spin_lock_irqsave(&pmbe->lock, irqflags);
 
 		for (j = 0; j < ARRAY_SIZE(pmb_sizes); j++)
 			if (pmb_sizes[j].flag == size)
 				pmbe->size = pmb_sizes[j].size;
 
 		if (pmbp) {
-			spin_lock(&pmbp->lock);
-
+			raw_spin_lock_nested(&pmbp->lock, SINGLE_DEPTH_NESTING);
 			/*
 			 * Compare the previous entry against the current one to
 			 * see if the entries span a contiguous mapping. If so,
@@ -641,13 +643,12 @@ static void __init pmb_synchronize(void)
 			 */
 			if (pmb_can_merge(pmbp, pmbe))
 				pmbp->link = pmbe;
-
-			spin_unlock(&pmbp->lock);
+			raw_spin_unlock(&pmbp->lock);
 		}
 
 		pmbp = pmbe;
 
-		spin_unlock_irqrestore(&pmbe->lock, irqflags);
+		raw_spin_unlock_irqrestore(&pmbe->lock, irqflags);
 	}
 }
 
@@ -679,7 +680,7 @@ static void __init pmb_merge(struct pmb_entry *head)
 	/*
 	 * The merged page size must be valid.
 	 */
-	if (!pmb_size_valid(newsize))
+	if (!depth || !pmb_size_valid(newsize))
 		return;
 
 	head->flags &= ~PMB_SZ_MASK;
@@ -754,7 +755,7 @@ static void __init pmb_resize(void)
 		/*
 		 * Found it, now resize it.
 		 */
-		spin_lock_irqsave(&pmbe->lock, flags);
+		raw_spin_lock_irqsave(&pmbe->lock, flags);
 
 		pmbe->size = SZ_16M;
 		pmbe->flags &= ~PMB_SZ_MASK;
@@ -764,10 +765,10 @@ static void __init pmb_resize(void)
 
 		__set_pmb_entry(pmbe);
 
-		spin_unlock_irqrestore(&pmbe->lock, flags);
+		raw_spin_unlock_irqrestore(&pmbe->lock, flags);
 	}
 
-	read_lock(&pmb_rwlock);
+	read_unlock(&pmb_rwlock);
 }
 #endif
 
@@ -802,7 +803,7 @@ void __init pmb_init(void)
 	writel_uncached(0, PMB_IRMCR);
 
 	/* Flush out the TLB */
-	__raw_writel(__raw_readl(MMUCR) | MMUCR_TI, MMUCR);
+	local_flush_tlb_all();
 	ctrl_barrier();
 }
 
@@ -863,57 +864,40 @@ static int __init pmb_debugfs_init(void)
 	struct dentry *dentry;
 
 	dentry = debugfs_create_file("pmb", S_IFREG | S_IRUGO,
-				     sh_debugfs_root, NULL, &pmb_debugfs_fops);
+				     arch_debugfs_dir, NULL, &pmb_debugfs_fops);
 	if (!dentry)
 		return -ENOMEM;
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
 
 	return 0;
 }
 subsys_initcall(pmb_debugfs_init);
 
 #ifdef CONFIG_PM
-static int pmb_sysdev_suspend(struct sys_device *dev, pm_message_t state)
+static void pmb_syscore_resume(void)
 {
-	static pm_message_t prev_state;
+	struct pmb_entry *pmbe;
 	int i;
 
-	/* Restore the PMB after a resume from hibernation */
-	if (state.event == PM_EVENT_ON &&
-	    prev_state.event == PM_EVENT_FREEZE) {
-		struct pmb_entry *pmbe;
-
-		read_lock(&pmb_rwlock);
+	read_lock(&pmb_rwlock);
 
-		for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
-			if (test_bit(i, pmb_map)) {
-				pmbe = &pmb_entry_list[i];
-				set_pmb_entry(pmbe);
-			}
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		if (test_bit(i, pmb_map)) {
+			pmbe = &pmb_entry_list[i];
+			set_pmb_entry(pmbe);
 		}
-
-		read_unlock(&pmb_rwlock);
 	}
 
-	prev_state = state;
-
-	return 0;
-}
-
-static int pmb_sysdev_resume(struct sys_device *dev)
-{
-	return pmb_sysdev_suspend(dev, PMSG_ON);
+	read_unlock(&pmb_rwlock);
 }
 
-static struct sysdev_driver pmb_sysdev_driver = {
-	.suspend = pmb_sysdev_suspend,
-	.resume = pmb_sysdev_resume,
+static struct syscore_ops pmb_syscore_ops = {
+	.resume = pmb_syscore_resume,
 };
 
 static int __init pmb_sysdev_init(void)
 {
-	return sysdev_driver_register(&cpu_sysdev_class, &pmb_sysdev_driver);
+	register_syscore_ops(&pmb_syscore_ops);
+	return 0;
 }
 subsys_initcall(pmb_sysdev_init);
 #endif
diff --git a/arch/sh/mm/sram.c b/arch/sh/mm/sram.c
new file mode 100644
index 00000000000..2d8fa718d55
--- /dev/null
+++ b/arch/sh/mm/sram.c
@@ -0,0 +1,35 @@
+/*
+ * SRAM pool for tiny memories not otherwise managed.
+ *
+ * Copyright (C) 2010  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <asm/sram.h>
+
+/*
+ * This provides a standard SRAM pool for tiny memories that can be
+ * added either by the CPU or the platform code. Typical SRAM sizes
+ * to be inserted in to the pool will generally be less than the page
+ * size, with anything more reasonably sized handled as a NUMA memory
+ * node.
+ */
+struct gen_pool *sram_pool;
+
+static int __init sram_pool_init(void)
+{
+	/*
+	 * This is a global pool, we don't care about node locality.
+	 */
+	sram_pool = gen_pool_create(1, -1);
+	if (unlikely(!sram_pool))
+		return -ENOMEM;
+
+	return 0;
+}
+core_initcall(sram_pool_init);
diff --git a/arch/sh/mm/tlb-debugfs.c b/arch/sh/mm/tlb-debugfs.c
new file mode 100644
index 00000000000..dea637a0924
--- /dev/null
+++ b/arch/sh/mm/tlb-debugfs.c
@@ -0,0 +1,172 @@
+/*
+ * arch/sh/mm/tlb-debugfs.c
+ *
+ * debugfs ops for SH-4 ITLB/UTLBs.
+ *
+ * Copyright (C) 2010  Matt Fleming
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <asm/processor.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+enum tlb_type {
+	TLB_TYPE_ITLB,
+	TLB_TYPE_UTLB,
+};
+
+static struct {
+	int bits;
+	const char *size;
+} tlb_sizes[] = {
+	{ 0x0, "  1KB" },
+	{ 0x1, "  4KB" },
+	{ 0x2, "  8KB" },
+	{ 0x4, " 64KB" },
+	{ 0x5, "256KB" },
+	{ 0x7, "  1MB" },
+	{ 0x8, "  4MB" },
+	{ 0xc, " 64MB" },
+};
+
+static int tlb_seq_show(struct seq_file *file, void *iter)
+{
+	unsigned int tlb_type = (unsigned int)file->private;
+	unsigned long addr1, addr2, data1, data2;
+	unsigned long flags;
+	unsigned long mmucr;
+	unsigned int nentries, entry;
+	unsigned int urb;
+
+	mmucr = __raw_readl(MMUCR);
+	if ((mmucr & 0x1) == 0) {
+		seq_printf(file, "address translation disabled\n");
+		return 0;
+	}
+
+	if (tlb_type == TLB_TYPE_ITLB) {
+		addr1 = MMU_ITLB_ADDRESS_ARRAY;
+		addr2 = MMU_ITLB_ADDRESS_ARRAY2;
+		data1 = MMU_ITLB_DATA_ARRAY;
+		data2 = MMU_ITLB_DATA_ARRAY2;
+		nentries = 4;
+	} else {
+		addr1 = MMU_UTLB_ADDRESS_ARRAY;
+		addr2 = MMU_UTLB_ADDRESS_ARRAY2;
+		data1 = MMU_UTLB_DATA_ARRAY;
+		data2 = MMU_UTLB_DATA_ARRAY2;
+		nentries = 64;
+	}
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	urb = (mmucr & MMUCR_URB) >> MMUCR_URB_SHIFT;
+
+	/* Make the "entry >= urb" test fail. */
+	if (urb == 0)
+		urb = MMUCR_URB_NENTRIES + 1;
+
+	if (tlb_type == TLB_TYPE_ITLB) {
+		addr1 = MMU_ITLB_ADDRESS_ARRAY;
+		addr2 = MMU_ITLB_ADDRESS_ARRAY2;
+		data1 = MMU_ITLB_DATA_ARRAY;
+		data2 = MMU_ITLB_DATA_ARRAY2;
+		nentries = 4;
+	} else {
+		addr1 = MMU_UTLB_ADDRESS_ARRAY;
+		addr2 = MMU_UTLB_ADDRESS_ARRAY2;
+		data1 = MMU_UTLB_DATA_ARRAY;
+		data2 = MMU_UTLB_DATA_ARRAY2;
+		nentries = 64;
+	}
+
+	seq_printf(file, "entry:     vpn        ppn     asid  size valid wired\n");
+
+	for (entry = 0; entry < nentries; entry++) {
+		unsigned long vpn, ppn, asid, size;
+		unsigned long valid;
+		unsigned long val;
+		const char *sz = "    ?";
+		int i;
+
+		val = __raw_readl(addr1 | (entry << MMU_TLB_ENTRY_SHIFT));
+		ctrl_barrier();
+		vpn = val & 0xfffffc00;
+		valid = val & 0x100;
+
+		val = __raw_readl(addr2 | (entry << MMU_TLB_ENTRY_SHIFT));
+		ctrl_barrier();
+		asid = val & MMU_CONTEXT_ASID_MASK;
+
+		val = __raw_readl(data1 | (entry << MMU_TLB_ENTRY_SHIFT));
+		ctrl_barrier();
+		ppn = (val & 0x0ffffc00) << 4;
+
+		val = __raw_readl(data2 | (entry << MMU_TLB_ENTRY_SHIFT));
+		ctrl_barrier();
+		size = (val & 0xf0) >> 4;
+
+		for (i = 0; i < ARRAY_SIZE(tlb_sizes); i++) {
+			if (tlb_sizes[i].bits == size)
+				break;
+		}
+
+		if (i != ARRAY_SIZE(tlb_sizes))
+			sz = tlb_sizes[i].size;
+
+		seq_printf(file, "%2d:    0x%08lx 0x%08lx %5lu %s   %s     %s\n",
+			   entry, vpn, ppn, asid,
+			   sz, valid ? "V" : "-",
+			   (urb <= entry) ? "W" : "-");
+	}
+
+	back_to_cached();
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+static int tlb_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, tlb_seq_show, inode->i_private);
+}
+
+static const struct file_operations tlb_debugfs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= tlb_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init tlb_debugfs_init(void)
+{
+	struct dentry *itlb, *utlb;
+
+	itlb = debugfs_create_file("itlb", S_IRUSR, arch_debugfs_dir,
+				   (unsigned int *)TLB_TYPE_ITLB,
+				   &tlb_debugfs_fops);
+	if (unlikely(!itlb))
+		return -ENOMEM;
+
+	utlb = debugfs_create_file("utlb", S_IRUSR, arch_debugfs_dir,
+				   (unsigned int *)TLB_TYPE_UTLB,
+				   &tlb_debugfs_fops);
+	if (unlikely(!utlb)) {
+		debugfs_remove(itlb);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+module_init(tlb_debugfs_init);
+
+MODULE_LICENSE("GPL v2");
diff --git a/arch/sh/mm/tlb-pteaex.c b/arch/sh/mm/tlb-pteaex.c
index 32dc674c550..4db21adfe5d 100644
--- a/arch/sh/mm/tlb-pteaex.c
+++ b/arch/sh/mm/tlb-pteaex.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/io.h>
-#include <asm/system.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
@@ -73,5 +72,35 @@ void local_flush_tlb_one(unsigned long asid, unsigned long page)
 	jump_to_uncached();
 	__raw_writel(page, MMU_UTLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT);
 	__raw_writel(asid, MMU_UTLB_ADDRESS_ARRAY2 | MMU_PAGE_ASSOC_BIT);
+	__raw_writel(page, MMU_ITLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT);
+	__raw_writel(asid, MMU_ITLB_ADDRESS_ARRAY2 | MMU_PAGE_ASSOC_BIT);
 	back_to_cached();
 }
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags, status;
+	int i;
+
+	/*
+	 * Flush all the TLB.
+	 */
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	status = __raw_readl(MMUCR);
+	status = ((status & MMUCR_URB) >> MMUCR_URB_SHIFT);
+
+	if (status == 0)
+		status = MMUCR_URB_NENTRIES;
+
+	for (i = 0; i < status; i++)
+		__raw_writel(0x0, MMU_UTLB_ADDRESS_ARRAY | (i << 8));
+
+	for (i = 0; i < 4; i++)
+		__raw_writel(0x0, MMU_ITLB_ADDRESS_ARRAY | (i << 8));
+
+	back_to_cached();
+	ctrl_barrier();
+	local_irq_restore(flags);
+}
diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c
index 4f5f7cbdd50..6554fb439f0 100644
--- a/arch/sh/mm/tlb-sh3.c
+++ b/arch/sh/mm/tlb-sh3.c
@@ -20,7 +20,6 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 
-#include <asm/system.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
@@ -77,3 +76,22 @@ void local_flush_tlb_one(unsigned long asid, unsigned long page)
 	for (i = 0; i < ways; i++)
 		__raw_writel(data, addr + (i << 8));
 }
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags, status;
+
+	/*
+	 * Flush all the TLB.
+	 *
+	 * Write to the MMU control register's bit:
+	 *	TF-bit for SH-3, TI-bit for SH-4.
+	 *      It's same position, bit #2.
+	 */
+	local_irq_save(flags);
+	status = __raw_readl(MMUCR);
+	status |= 0x04;
+	__raw_writel(status, MMUCR);
+	ctrl_barrier();
+	local_irq_restore(flags);
+}
diff --git a/arch/sh/mm/tlb-sh4.c b/arch/sh/mm/tlb-sh4.c
index ccac77f504a..d42dd7e443d 100644
--- a/arch/sh/mm/tlb-sh4.c
+++ b/arch/sh/mm/tlb-sh4.c
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/io.h>
-#include <asm/system.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
@@ -80,3 +79,31 @@ void local_flush_tlb_one(unsigned long asid, unsigned long page)
 	__raw_writel(data, addr);
 	back_to_cached();
 }
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags, status;
+	int i;
+
+	/*
+	 * Flush all the TLB.
+	 */
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	status = __raw_readl(MMUCR);
+	status = ((status & MMUCR_URB) >> MMUCR_URB_SHIFT);
+
+	if (status == 0)
+		status = MMUCR_URB_NENTRIES;
+
+	for (i = 0; i < status; i++)
+		__raw_writel(0x0, MMU_UTLB_ADDRESS_ARRAY | (i << 8));
+
+	for (i = 0; i < 4; i++)
+		__raw_writel(0x0, MMU_ITLB_ADDRESS_ARRAY | (i << 8));
+
+	back_to_cached();
+	ctrl_barrier();
+	local_irq_restore(flags);
+}
diff --git a/arch/sh/mm/tlb-sh5.c b/arch/sh/mm/tlb-sh5.c
index f27dbe1c159..e4bb2a8e0a6 100644
--- a/arch/sh/mm/tlb-sh5.c
+++ b/arch/sh/mm/tlb-sh5.c
@@ -17,7 +17,7 @@
 /**
  * sh64_tlb_init - Perform initial setup for the DTLB and ITLB.
  */
-int __init sh64_tlb_init(void)
+int sh64_tlb_init(void)
 {
 	/* Assign some sane DTLB defaults */
 	cpu_data->dtlb.entries	= 64;
@@ -182,3 +182,43 @@ void tlb_unwire_entry(void)
 
 	local_irq_restore(flags);
 }
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	unsigned long long ptel;
+	unsigned long long pteh=0;
+	struct tlb_info *tlbp;
+	unsigned long long next;
+	unsigned int fault_code = get_thread_fault_code();
+
+	/* Get PTEL first */
+	ptel = pte.pte_low;
+
+	/*
+	 * Set PTEH register
+	 */
+	pteh = neff_sign_extend(address & MMU_VPN_MASK);
+
+	/* Set the ASID. */
+	pteh |= get_asid() << PTEH_ASID_SHIFT;
+	pteh |= PTEH_VALID;
+
+	/* Set PTEL register, set_pte has performed the sign extension */
+	ptel &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
+
+	if (fault_code & FAULT_CODE_ITLB)
+		tlbp = &cpu_data->itlb;
+	else
+		tlbp = &cpu_data->dtlb;
+
+	next = tlbp->next;
+	__flush_tlb_slot(next);
+	asm volatile ("putcfg %0,1,%2\n\n\t"
+		      "putcfg %0,0,%1\n"
+		      :  : "r" (next), "r" (pteh), "r" (ptel) );
+
+	next += TLB_STEP;
+	if (next > tlbp->last)
+		next = tlbp->first;
+	tlbp->next = next;
+}
diff --git a/arch/sh/mm/tlb-urb.c b/arch/sh/mm/tlb-urb.c
index bb5b9098956..c92ce20db39 100644
--- a/arch/sh/mm/tlb-urb.c
+++ b/arch/sh/mm/tlb-urb.c
@@ -24,13 +24,9 @@ void tlb_wire_entry(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
 
 	local_irq_save(flags);
 
-	/* Load the entry into the TLB */
-	__update_tlb(vma, addr, pte);
-
-	/* ... and wire it up. */
 	status = __raw_readl(MMUCR);
 	urb = (status & MMUCR_URB) >> MMUCR_URB_SHIFT;
-	status &= ~MMUCR_URB;
+	status &= ~MMUCR_URC;
 
 	/*
 	 * Make sure we're not trying to wire the last TLB entry slot.
@@ -39,7 +35,23 @@ void tlb_wire_entry(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
 
 	urb = urb % MMUCR_URB_NENTRIES;
 
+	/*
+	 * Insert this entry into the highest non-wired TLB slot (via
+	 * the URC field).
+	 */
+	status |= (urb << MMUCR_URC_SHIFT);
+	__raw_writel(status, MMUCR);
+	ctrl_barrier();
+
+	/* Load the entry into the TLB */
+	__update_tlb(vma, addr, pte);
+
+	/* ... and wire it up. */
+	status = __raw_readl(MMUCR);
+
+	status &= ~MMUCR_URB;
 	status |= (urb << MMUCR_URB_SHIFT);
+
 	__raw_writel(status, MMUCR);
 	ctrl_barrier();
 
diff --git a/arch/sh/mm/tlbex_32.c b/arch/sh/mm/tlbex_32.c
new file mode 100644
index 00000000000..382262dc0c4
--- /dev/null
+++ b/arch/sh/mm/tlbex_32.c
@@ -0,0 +1,78 @@
+/*
+ * TLB miss handler for SH with an MMU.
+ *
+ *  Copyright (C) 1999  Niibe Yutaka
+ *  Copyright (C) 2003 - 2012  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <asm/mmu_context.h>
+#include <asm/thread_info.h>
+
+/*
+ * Called with interrupts disabled.
+ */
+asmlinkage int __kprobes
+handle_tlbmiss(struct pt_regs *regs, unsigned long error_code,
+	       unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+
+	/*
+	 * We don't take page faults for P1, P2, and parts of P4, these
+	 * are always mapped, whether it be due to legacy behaviour in
+	 * 29-bit mode, or due to PMB configuration in 32-bit mode.
+	 */
+	if (address >= P3SEG && address < P3_ADDR_MAX) {
+		pgd = pgd_offset_k(address);
+	} else {
+		if (unlikely(address >= TASK_SIZE || !current->mm))
+			return 1;
+
+		pgd = pgd_offset(current->mm, address);
+	}
+
+	pud = pud_offset(pgd, address);
+	if (pud_none_or_clear_bad(pud))
+		return 1;
+	pmd = pmd_offset(pud, address);
+	if (pmd_none_or_clear_bad(pmd))
+		return 1;
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+	if (unlikely(pte_none(entry) || pte_not_present(entry)))
+		return 1;
+	if (unlikely(error_code && !pte_write(entry)))
+		return 1;
+
+	if (error_code)
+		entry = pte_mkdirty(entry);
+	entry = pte_mkyoung(entry);
+
+	set_pte(pte, entry);
+
+#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
+	/*
+	 * SH-4 does not set MMUCR.RC to the corresponding TLB entry in
+	 * the case of an initial page write exception, so we need to
+	 * flush it in order to avoid potential TLB entry duplication.
+	 */
+	if (error_code == FAULT_CODE_INITIAL)
+		local_flush_tlb_one(get_asid(), address & PAGE_MASK);
+#endif
+
+	set_thread_fault_code(error_code);
+	update_mmu_cache(NULL, address, pte);
+
+	return 0;
+}
diff --git a/arch/sh/mm/tlbex_64.c b/arch/sh/mm/tlbex_64.c
new file mode 100644
index 00000000000..8557548fc53
--- /dev/null
+++ b/arch/sh/mm/tlbex_64.c
@@ -0,0 +1,166 @@
+/*
+ * The SH64 TLB miss.
+ *
+ * Original code from fault.c
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ *
+ * Fast PTE->TLB refill path
+ * Copyright (C) 2003 Richard.Curnow@superh.com
+ *
+ * IMPORTANT NOTES :
+ * The do_fast_page_fault function is called from a context in entry.S
+ * where very few registers have been saved.  In particular, the code in
+ * this file must be compiled not to use ANY caller-save registers that
+ * are not part of the restricted save set.  Also, it means that code in
+ * this file must not make calls to functions elsewhere in the kernel, or
+ * else the excepting context will see corruption in its caller-save
+ * registers.  Plus, the entry.S save area is non-reentrant, so this code
+ * has to run with SR.BL==1, i.e. no interrupts taken inside it and panic
+ * on any exception.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/kprobes.h>
+#include <asm/tlb.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+
+static int handle_tlbmiss(unsigned long long protection_flags,
+			  unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+
+	if (is_vmalloc_addr((void *)address)) {
+		pgd = pgd_offset_k(address);
+	} else {
+		if (unlikely(address >= TASK_SIZE || !current->mm))
+			return 1;
+
+		pgd = pgd_offset(current->mm, address);
+	}
+
+	pud = pud_offset(pgd, address);
+	if (pud_none(*pud) || !pud_present(*pud))
+		return 1;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd) || !pmd_present(*pmd))
+		return 1;
+
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+	if (pte_none(entry) || !pte_present(entry))
+		return 1;
+
+	/*
+	 * If the page doesn't have sufficient protection bits set to
+	 * service the kind of fault being handled, there's not much
+	 * point doing the TLB refill.  Punt the fault to the general
+	 * handler.
+	 */
+	if ((pte_val(entry) & protection_flags) != protection_flags)
+		return 1;
+
+	update_mmu_cache(NULL, address, pte);
+
+	return 0;
+}
+
+/*
+ * Put all this information into one structure so that everything is just
+ * arithmetic relative to a single base address.  This reduces the number
+ * of movi/shori pairs needed just to load addresses of static data.
+ */
+struct expevt_lookup {
+	unsigned short protection_flags[8];
+	unsigned char  is_text_access[8];
+	unsigned char  is_write_access[8];
+};
+
+#define PRU (1<<9)
+#define PRW (1<<8)
+#define PRX (1<<7)
+#define PRR (1<<6)
+
+/* Sized as 8 rather than 4 to allow checking the PTE's PRU bit against whether
+   the fault happened in user mode or privileged mode. */
+static struct expevt_lookup expevt_lookup_table = {
+	.protection_flags = {PRX, PRX, 0, 0, PRR, PRR, PRW, PRW},
+	.is_text_access   = {1,   1,   0, 0, 0,   0,   0,   0}
+};
+
+static inline unsigned int
+expevt_to_fault_code(unsigned long expevt)
+{
+	if (expevt == 0xa40)
+		return FAULT_CODE_ITLB;
+	else if (expevt == 0x060)
+		return FAULT_CODE_WRITE;
+
+	return 0;
+}
+
+/*
+   This routine handles page faults that can be serviced just by refilling a
+   TLB entry from an existing page table entry.  (This case represents a very
+   large majority of page faults.) Return 1 if the fault was successfully
+   handled.  Return 0 if the fault could not be handled.  (This leads into the
+   general fault handling in fault.c which deals with mapping file-backed
+   pages, stack growth, segmentation faults, swapping etc etc)
+ */
+asmlinkage int __kprobes
+do_fast_page_fault(unsigned long long ssr_md, unsigned long long expevt,
+		   unsigned long address)
+{
+	unsigned long long protection_flags;
+	unsigned long long index;
+	unsigned long long expevt4;
+	unsigned int fault_code;
+
+	/* The next few lines implement a way of hashing EXPEVT into a
+	 * small array index which can be used to lookup parameters
+	 * specific to the type of TLBMISS being handled.
+	 *
+	 * Note:
+	 *	ITLBMISS has EXPEVT==0xa40
+	 *	RTLBMISS has EXPEVT==0x040
+	 *	WTLBMISS has EXPEVT==0x060
+	 */
+	expevt4 = (expevt >> 4);
+	/* TODO : xor ssr_md into this expression too. Then we can check
+	 * that PRU is set when it needs to be. */
+	index = expevt4 ^ (expevt4 >> 5);
+	index &= 7;
+
+	fault_code = expevt_to_fault_code(expevt);
+
+	protection_flags = expevt_lookup_table.protection_flags[index];
+
+	if (expevt_lookup_table.is_text_access[index])
+		fault_code |= FAULT_CODE_ITLB;
+	if (!ssr_md)
+		fault_code |= FAULT_CODE_USER;
+
+	set_thread_fault_code(fault_code);
+
+	return handle_tlbmiss(protection_flags, address);
+}
diff --git a/arch/sh/mm/tlbflush_32.c b/arch/sh/mm/tlbflush_32.c
index 004bb3f25b5..a6a20d6de4c 100644
--- a/arch/sh/mm/tlbflush_32.c
+++ b/arch/sh/mm/tlbflush_32.c
@@ -120,21 +120,18 @@ void local_flush_tlb_mm(struct mm_struct *mm)
 	}
 }
 
-void local_flush_tlb_all(void)
+void __flush_tlb_global(void)
 {
-	unsigned long flags, status;
+	unsigned long flags;
+
+	local_irq_save(flags);
 
 	/*
-	 * Flush all the TLB.
-	 *
-	 * Write to the MMU control register's bit:
-	 *	TF-bit for SH-3, TI-bit for SH-4.
-	 *      It's same position, bit #2.
+	 * This is the most destructive of the TLB flushing options,
+	 * and will tear down all of the UTLB/ITLB mappings, including
+	 * wired entries.
 	 */
-	local_irq_save(flags);
-	status = __raw_readl(MMUCR);
-	status |= 0x04;
-	__raw_writel(status, MMUCR);
-	ctrl_barrier();
+	__raw_writel(__raw_readl(MMUCR) | MMUCR_TI, MMUCR);
+
 	local_irq_restore(flags);
 }
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index 706da1d3a67..f33fdd2558e 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2000, 2001  Paolo Alberelli
  * Copyright (C) 2003  Richard Curnow (/proc/tlb, bug fixes)
- * Copyright (C) 2003 - 2009 Paul Mundt
+ * Copyright (C) 2003 - 2012 Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -22,313 +22,12 @@
 #include <linux/smp.h>
 #include <linux/perf_event.h>
 #include <linux/interrupt.h>
-#include <asm/system.h>
 #include <asm/io.h>
 #include <asm/tlb.h>
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 
-extern void die(const char *,struct pt_regs *,long);
-
-#define PFLAG(val,flag)   (( (val) & (flag) ) ? #flag : "" )
-#define PPROT(flag) PFLAG(pgprot_val(prot),flag)
-
-static inline void print_prots(pgprot_t prot)
-{
-	printk("prot is 0x%016llx\n",pgprot_val(prot));
-
-	printk("%s %s %s %s %s\n",PPROT(_PAGE_SHARED),PPROT(_PAGE_READ),
-	       PPROT(_PAGE_EXECUTE),PPROT(_PAGE_WRITE),PPROT(_PAGE_USER));
-}
-
-static inline void print_vma(struct vm_area_struct *vma)
-{
-	printk("vma start 0x%08lx\n", vma->vm_start);
-	printk("vma end   0x%08lx\n", vma->vm_end);
-
-	print_prots(vma->vm_page_prot);
-	printk("vm_flags 0x%08lx\n", vma->vm_flags);
-}
-
-static inline void print_task(struct task_struct *tsk)
-{
-	printk("Task pid %d\n", task_pid_nr(tsk));
-}
-
-static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address)
-{
-	pgd_t *dir;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	pte_t entry;
-
-	dir = pgd_offset(mm, address);
-	if (pgd_none(*dir))
-		return NULL;
-
-	pud = pud_offset(dir, address);
-	if (pud_none(*pud))
-		return NULL;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd))
-		return NULL;
-
-	pte = pte_offset_kernel(pmd, address);
-	entry = *pte;
-	if (pte_none(entry) || !pte_present(entry))
-		return NULL;
-
-	return pte;
-}
-
-/*
- * This routine handles page faults.  It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- */
-asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
-			      unsigned long textaccess, unsigned long address)
-{
-	struct task_struct *tsk;
-	struct mm_struct *mm;
-	struct vm_area_struct * vma;
-	const struct exception_table_entry *fixup;
-	pte_t *pte;
-	int fault;
-
-	/* SIM
-	 * Note this is now called with interrupts still disabled
-	 * This is to cope with being called for a missing IO port
-	 * address with interrupts disabled. This should be fixed as
-	 * soon as we have a better 'fast path' miss handler.
-	 *
-	 * Plus take care how you try and debug this stuff.
-	 * For example, writing debug data to a port which you
-	 * have just faulted on is not going to work.
-	 */
-
-	tsk = current;
-	mm = tsk->mm;
-
-	/* Not an IO address, so reenable interrupts */
-	local_irq_enable();
-
-	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
-
-	/*
-	 * If we're in an interrupt or have no user
-	 * context, we must not take the fault..
-	 */
-	if (in_atomic() || !mm)
-		goto no_context;
-
-	/* TLB misses upon some cache flushes get done under cli() */
-	down_read(&mm->mmap_sem);
-
-	vma = find_vma(mm, address);
-
-	if (!vma) {
-#ifdef DEBUG_FAULT
-		print_task(tsk);
-		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
-		       __func__, __LINE__,
-		       address,regs->pc,textaccess,writeaccess);
-		show_regs(regs);
-#endif
-		goto bad_area;
-	}
-	if (vma->vm_start <= address) {
-		goto good_area;
-	}
-
-	if (!(vma->vm_flags & VM_GROWSDOWN)) {
-#ifdef DEBUG_FAULT
-		print_task(tsk);
-		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
-		       __func__, __LINE__,
-		       address,regs->pc,textaccess,writeaccess);
-		show_regs(regs);
-
-		print_vma(vma);
-#endif
-		goto bad_area;
-	}
-	if (expand_stack(vma, address)) {
-#ifdef DEBUG_FAULT
-		print_task(tsk);
-		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
-		       __func__, __LINE__,
-		       address,regs->pc,textaccess,writeaccess);
-		show_regs(regs);
-#endif
-		goto bad_area;
-	}
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
-	if (textaccess) {
-		if (!(vma->vm_flags & VM_EXEC))
-			goto bad_area;
-	} else {
-		if (writeaccess) {
-			if (!(vma->vm_flags & VM_WRITE))
-				goto bad_area;
-		} else {
-			if (!(vma->vm_flags & VM_READ))
-				goto bad_area;
-		}
-	}
-
-	/*
-	 * If for any reason at all we couldn't handle the fault,
-	 * make sure we exit gracefully rather than endlessly redo
-	 * the fault.
-	 */
-survive:
-	fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
-	if (unlikely(fault & VM_FAULT_ERROR)) {
-		if (fault & VM_FAULT_OOM)
-			goto out_of_memory;
-		else if (fault & VM_FAULT_SIGBUS)
-			goto do_sigbus;
-		BUG();
-	}
-
-	if (fault & VM_FAULT_MAJOR) {
-		tsk->maj_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
-				     regs, address);
-	} else {
-		tsk->min_flt++;
-		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
-				     regs, address);
-	}
-
-	/* If we get here, the page fault has been handled.  Do the TLB refill
-	   now from the newly-setup PTE, to avoid having to fault again right
-	   away on the same instruction. */
-	pte = lookup_pte (mm, address);
-	if (!pte) {
-		/* From empirical evidence, we can get here, due to
-		   !pte_present(pte).  (e.g. if a swap-in occurs, and the page
-		   is swapped back out again before the process that wanted it
-		   gets rescheduled?) */
-		goto no_pte;
-	}
-
-	__do_tlb_refill(address, textaccess, pte);
-
-no_pte:
-
-	up_read(&mm->mmap_sem);
-	return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
-#ifdef DEBUG_FAULT
-	printk("fault:bad area\n");
-#endif
-	up_read(&mm->mmap_sem);
-
-	if (user_mode(regs)) {
-		static int count=0;
-		siginfo_t info;
-		if (count < 4) {
-			/* This is really to help debug faults when starting
-			 * usermode, so only need a few */
-			count++;
-			printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n",
-				address, task_pid_nr(current), current->comm,
-				(unsigned long) regs->pc);
-#if 0
-			show_regs(regs);
-#endif
-		}
-		if (is_global_init(tsk)) {
-			panic("INIT had user mode bad_area\n");
-		}
-		tsk->thread.address = address;
-		tsk->thread.error_code = writeaccess;
-		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		info.si_addr = (void *) address;
-		force_sig_info(SIGSEGV, &info, tsk);
-		return;
-	}
-
-no_context:
-#ifdef DEBUG_FAULT
-	printk("fault:No context\n");
-#endif
-	/* Are we prepared to handle this kernel fault?  */
-	fixup = search_exception_tables(regs->pc);
-	if (fixup) {
-		regs->pc = fixup->fixup;
-		return;
-	}
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- *
- */
-	if (address < PAGE_SIZE)
-		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
-	else
-		printk(KERN_ALERT "Unable to handle kernel paging request");
-	printk(" at virtual address %08lx\n", address);
-	printk(KERN_ALERT "pc = %08Lx%08Lx\n", regs->pc >> 32, regs->pc & 0xffffffff);
-	die("Oops", regs, writeaccess);
-	do_exit(SIGKILL);
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
-	if (is_global_init(current)) {
-		panic("INIT out of memory\n");
-		yield();
-		goto survive;
-	}
-	printk("fault:Out of memory\n");
-	up_read(&mm->mmap_sem);
-	if (is_global_init(current)) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (user_mode(regs))
-		do_group_exit(SIGKILL);
-	goto no_context;
-
-do_sigbus:
-	printk("fault:Do sigbus\n");
-	up_read(&mm->mmap_sem);
-
-	/*
-	 * Send a sigbus, regardless of whether we were in kernel
-	 * or user mode.
-	 */
-	tsk->thread.address = address;
-	tsk->thread.error_code = writeaccess;
-	tsk->thread.trap_no = 14;
-	force_sig(SIGBUS, tsk);
-
-	/* Kernel mode? Handle exceptions or die */
-	if (!user_mode(regs))
-		goto no_context;
-}
-
 void local_flush_tlb_one(unsigned long asid, unsigned long page)
 {
 	unsigned long long match, pteh=0, lpage;
@@ -467,6 +166,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
         flush_tlb_all();
 }
 
-void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+void __flush_tlb_global(void)
 {
+	flush_tlb_all();
 }
diff --git a/arch/sh/mm/uncached.c b/arch/sh/mm/uncached.c
index 8a4eca551fc..a7767da815e 100644
--- a/arch/sh/mm/uncached.c
+++ b/arch/sh/mm/uncached.c
@@ -28,7 +28,7 @@ EXPORT_SYMBOL(virt_addr_uncached);
 
 void __init uncached_init(void)
 {
-#ifdef CONFIG_29BIT
+#if defined(CONFIG_29BIT) || !defined(CONFIG_MMU)
 	uncached_start = P2SEG;
 #else
 	uncached_start = memory_end;