48 files changed, 4916 insertions, 3454 deletions
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 5fd218430b1..dba285e8680 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -21,6 +21,29 @@ config PAGE_OFFSET
 	default "0x20000000" if MMU && SUPERH64
 	default "0x00000000"
 
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	range 9 64 if PAGE_SIZE_16KB
+	default "9" if PAGE_SIZE_16KB
+	range 7 64 if PAGE_SIZE_64KB
+	default "7" if PAGE_SIZE_64KB
+	range 11 64
+	default "14" if !MMU
+	default "11"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
+	  The page size is not necessarily 4KB. Keep this in mind when
+	  choosing a value for this option.
+
 config MEMORY_START
 	hex "Physical memory start address"
 	default "0x08000000"
@@ -52,29 +75,25 @@ config MEMORY_SIZE
 config 29BIT
 	def_bool !32BIT
 	depends on SUPERH32
+	select UNCACHED_MAPPING
 
 config 32BIT
 	bool
-	default y if CPU_SH5
+	default y if CPU_SH5 || !MMU
 
 config PMB
 	bool "Support 32-bit physical addressing through PMB"
-	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785)
+	depends on MMU && CPU_SH4A && !CPU_SH4AL_DSP
 	select 32BIT
-	default y
+	select UNCACHED_MAPPING
 	help
 	  If you say Y here, physical addressing will be extended to
 	  32-bits through the SH-4A PMB. If this is not set, legacy
 	  29-bit physical addressing will be used.
 
 config X2TLB
-	bool "Enable extended TLB mode"
-	depends on (CPU_SHX2 || CPU_SHX3) && MMU && EXPERIMENTAL
-	help
-	  Selecting this option will enable the extended mode of the SH-X2
-	  TLB. For legacy SH-X behaviour and interoperability, say N. For
-	  all of the fun new features and a willingless to submit bug reports,
-	  say Y.
+	def_bool y
+	depends on (CPU_SHX2 || CPU_SHX3) && MMU
 
 config VSYSCALL
 	bool "Support vsyscall page"
@@ -91,7 +110,8 @@ config VSYSCALL
 
 config NUMA
 	bool "Non Uniform Memory Access (NUMA) Support"
-	depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL
+	depends on MMU && SYS_SUPPORTS_NUMA
+	select ARCH_WANT_NUMA_VARIABLE_LOCALITY
 	default n
 	help
 	  Some SH systems have many various memories scattered around
@@ -117,47 +137,56 @@ config ARCH_SPARSEMEM_ENABLE
 config ARCH_SPARSEMEM_DEFAULT
 	def_bool y
 
-config MAX_ACTIVE_REGIONS
-	int
-	default "6" if (CPU_SUBTYPE_SHX3 && SPARSEMEM)
-	default "2" if SPARSEMEM && (CPU_SUBTYPE_SH7722 || \
-		       CPU_SUBTYPE_SH7785)
-	default "1"
-
-config ARCH_POPULATES_NODE_MAP
-	def_bool y
-
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 
 config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
-	depends on SPARSEMEM
+	depends on SPARSEMEM && MMU
+
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+	def_bool y
+	depends on SPARSEMEM && MMU
 
 config ARCH_MEMORY_PROBE
 	def_bool y
 	depends on MEMORY_HOTPLUG
 
+config IOREMAP_FIXED
+       def_bool y
+       depends on X2TLB || SUPERH64
+
+config UNCACHED_MAPPING
+	bool
+
+config HAVE_SRAM_POOL
+	bool
+	select GENERIC_ALLOCATOR
+
 choice
 	prompt "Kernel page size"
-	default PAGE_SIZE_8KB if X2TLB
 	default PAGE_SIZE_4KB
 
 config PAGE_SIZE_4KB
 	bool "4kB"
-	depends on !X2TLB
 	help
 	  This is the default page size used by all SuperH CPUs.
 
 config PAGE_SIZE_8KB
 	bool "8kB"
-	depends on X2TLB
+	depends on !MMU || X2TLB
 	help
 	  This enables 8kB pages as supported by SH-X2 and later MMUs.
 
+config PAGE_SIZE_16KB
+	bool "16kB"
+	depends on !MMU
+	help
+	  This enables 16kB pages on MMU-less SH systems.
+
 config PAGE_SIZE_64KB
 	bool "64kB"
-	depends on CPU_SH4 || CPU_SH5
+	depends on !MMU || CPU_SH4 || CPU_SH5
 	help
 	  This enables support for 64kB pages, possible on all SH-4
 	  CPUs and later.
@@ -166,11 +195,13 @@ endchoice
 
 choice
 	prompt "HugeTLB page size"
-	depends on HUGETLB_PAGE && (CPU_SH4 || CPU_SH5) && MMU
+	depends on HUGETLB_PAGE
+	default HUGETLB_PAGE_SIZE_1MB if PAGE_SIZE_64KB
 	default HUGETLB_PAGE_SIZE_64K
 
 config HUGETLB_PAGE_SIZE_64K
 	bool "64kB"
+	depends on !PAGE_SIZE_64KB
 
 config HUGETLB_PAGE_SIZE_256K
 	bool "256kB"
@@ -195,6 +226,15 @@ endchoice
 
 source "mm/Kconfig"
 
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SMP
+	default y
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
 endmenu
 
 menu "Cache configuration"
@@ -204,18 +244,6 @@ config SH7705_CACHE_32KB
 	depends on CPU_SUBTYPE_SH7705
 	default y
 
-config SH_DIRECT_MAPPED
-	bool "Use direct-mapped caching"
-	default n
-	help
-	  Selecting this option will configure the caches to be direct-mapped,
-	  even if the cache supports a 2 or 4-way mode. This is useful primarily
-	  for debugging on platforms with 2 and 4-way caches (SH7750R/SH7751R,
-	  SH4-202, SH4-501, etc.)
-
-	  Turn this option off for platforms that do not have a direct-mapped
-	  cache, and you have no need to run the caches in such a configuration.
-
 choice
 	prompt "Cache mode"
 	default CACHE_WRITEBACK if CPU_SH2A || CPU_SH3 || CPU_SH4 || CPU_SH5
@@ -223,7 +251,6 @@ choice
 
 config CACHE_WRITEBACK
 	bool "Write-back"
-	depends on CPU_SH2A || CPU_SH3 || CPU_SH4 || CPU_SH5
 
 config CACHE_WRITETHROUGH
 	bool "Write-through"
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index 9f4bc3d90b1..cee6b9999d8 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -1,5 +1,72 @@
-ifeq ($(CONFIG_SUPERH32),y)
-include ${srctree}/arch/sh/mm/Makefile_32
-else
-include ${srctree}/arch/sh/mm/Makefile_64
+#
+# Makefile for the Linux SuperH-specific parts of the memory manager.
+#
+
+obj-y			:= alignment.o cache.o init.o consistent.o mmap.o
+
+cacheops-$(CONFIG_CPU_SH2)		:= cache-sh2.o
+cacheops-$(CONFIG_CPU_SH2A)		:= cache-sh2a.o
+cacheops-$(CONFIG_CPU_SH3)		:= cache-sh3.o
+cacheops-$(CONFIG_CPU_SH4)		:= cache-sh4.o flush-sh4.o
+cacheops-$(CONFIG_CPU_SH5)		:= cache-sh5.o flush-sh4.o
+cacheops-$(CONFIG_SH7705_CACHE_32KB)	+= cache-sh7705.o
+cacheops-$(CONFIG_CPU_SHX3)		+= cache-shx3.o
+
+obj-y			+= $(cacheops-y)
+
+mmu-y			:= nommu.o extable_32.o
+mmu-$(CONFIG_MMU)	:= extable_$(BITS).o fault.o gup.o ioremap.o kmap.o \
+			   pgtable.o tlbex_$(BITS).o tlbflush_$(BITS).o
+
+obj-y			+= $(mmu-y)
+
+debugfs-y			:= asids-debugfs.o
+ifndef CONFIG_CACHE_OFF
+debugfs-$(CONFIG_CPU_SH4)	+= cache-debugfs.o
 endif
+
+ifdef CONFIG_MMU
+debugfs-$(CONFIG_CPU_SH4)	+= tlb-debugfs.o
+tlb-$(CONFIG_CPU_SH3)		:= tlb-sh3.o
+tlb-$(CONFIG_CPU_SH4)		:= tlb-sh4.o tlb-urb.o
+tlb-$(CONFIG_CPU_SH5)		:= tlb-sh5.o
+tlb-$(CONFIG_CPU_HAS_PTEAEX)	:= tlb-pteaex.o tlb-urb.o
+obj-y				+= $(tlb-y)
+endif
+
+obj-$(CONFIG_DEBUG_FS)		+= $(debugfs-y)
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_PMB)		+= pmb.o
+obj-$(CONFIG_NUMA)		+= numa.o
+obj-$(CONFIG_IOREMAP_FIXED)	+= ioremap_fixed.o
+obj-$(CONFIG_UNCACHED_MAPPING)	+= uncached.o
+obj-$(CONFIG_HAVE_SRAM_POOL)	+= sram.o
+
+GCOV_PROFILE_pmb.o := n
+
+# Special flags for tlbex_64.o.  This puts restrictions on the number of
+# caller-save registers that the compiler can target when building this file.
+# This is required because the code is called from a context in entry.S where
+# very few registers have been saved in the exception handler (for speed
+# reasons).
+# The caller save registers that have been saved and which can be used are
+# r2,r3,r4,r5 : argument passing
+# r15, r18 : SP and LINK
+# tr0-4 : allow all caller-save TR's.  The compiler seems to be able to make
+#         use of them, so it's probably beneficial to performance to save them
+#         and have them available for it.
+#
+# The resources not listed below are callee save, i.e. the compiler is free to
+# use any of them and will spill them to the stack itself.
+
+CFLAGS_tlbex_64.o += -ffixed-r7 \
+	-ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \
+	-ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \
+	-ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
+	-ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
+	-ffixed-r36 -ffixed-r37 -ffixed-r38 -ffixed-r39 -ffixed-r40 \
+	-ffixed-r41 -ffixed-r42 -ffixed-r43  \
+	-ffixed-r60 -ffixed-r61 -ffixed-r62 \
+	-fomit-frame-pointer
+
+ccflags-y := -Werror
diff --git a/arch/sh/mm/Makefile_32 b/arch/sh/mm/Makefile_32
deleted file mode 100644
index e295db60b91..00000000000
--- a/arch/sh/mm/Makefile_32
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# Makefile for the Linux SuperH-specific parts of the memory manager.
-#
-
-obj-y			:= init.o extable_32.o consistent.o
-
-ifndef CONFIG_CACHE_OFF
-obj-$(CONFIG_CPU_SH2)		+= cache-sh2.o
-obj-$(CONFIG_CPU_SH3)		+= cache-sh3.o
-obj-$(CONFIG_CPU_SH4)		+= cache-sh4.o
-obj-$(CONFIG_SH7705_CACHE_32KB)	+= cache-sh7705.o
-endif
-
-mmu-y			:= tlb-nommu.o pg-nommu.o
-mmu-$(CONFIG_MMU)	:= fault_32.o tlbflush_32.o ioremap_32.o
-
-obj-y			+= $(mmu-y)
-
-ifdef CONFIG_DEBUG_FS
-obj-$(CONFIG_CPU_SH4)	+= cache-debugfs.o
-endif
-
-ifdef CONFIG_MMU
-obj-$(CONFIG_CPU_SH3)	+= tlb-sh3.o
-obj-$(CONFIG_CPU_SH4)	+= tlb-sh4.o
-ifndef CONFIG_CACHE_OFF
-obj-$(CONFIG_CPU_SH4)		+= pg-sh4.o
-obj-$(CONFIG_SH7705_CACHE_32KB)	+= pg-sh7705.o
-endif
-endif
-
-obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_PMB)		+= pmb.o
-obj-$(CONFIG_NUMA)		+= numa.o
-
-EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/mm/Makefile_64 b/arch/sh/mm/Makefile_64
deleted file mode 100644
index cbd6aa33c5a..00000000000
--- a/arch/sh/mm/Makefile_64
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Makefile for the Linux SuperH-specific parts of the memory manager.
-#
-
-obj-y			:= init.o extable_64.o consistent.o
-
-mmu-y			:= tlb-nommu.o pg-nommu.o
-mmu-$(CONFIG_MMU)	:= fault_64.o ioremap_64.o tlbflush_64.o tlb-sh5.o
-
-ifndef CONFIG_CACHE_OFF
-obj-y			+= cache-sh5.o
-endif
-
-obj-y			+= $(mmu-y)
-
-obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_NUMA)		+= numa.o
-
-EXTRA_CFLAGS += -Werror
-
-# Special flags for fault_64.o.  This puts restrictions on the number of
-# caller-save registers that the compiler can target when building this file.
-# This is required because the code is called from a context in entry.S where
-# very few registers have been saved in the exception handler (for speed
-# reasons).
-# The caller save registers that have been saved and which can be used are
-# r2,r3,r4,r5 : argument passing
-# r15, r18 : SP and LINK
-# tr0-4 : allow all caller-save TR's.  The compiler seems to be able to make
-#         use of them, so it's probably beneficial to performance to save them
-#         and have them available for it.
-#
-# The resources not listed below are callee save, i.e. the compiler is free to
-# use any of them and will spill them to the stack itself.
-
-CFLAGS_fault_64.o += -ffixed-r7 \
-	-ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 \
-	-ffixed-r13 -ffixed-r14 -ffixed-r16 -ffixed-r17 -ffixed-r19 \
-	-ffixed-r20 -ffixed-r21 -ffixed-r22 -ffixed-r23 \
-	-ffixed-r24 -ffixed-r25 -ffixed-r26 -ffixed-r27 \
-	-ffixed-r36 -ffixed-r37 -ffixed-r38 -ffixed-r39 -ffixed-r40 \
-	-ffixed-r41 -ffixed-r42 -ffixed-r43  \
-	-ffixed-r60 -ffixed-r61 -ffixed-r62 \
-	-fomit-frame-pointer
diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c
new file mode 100644
index 00000000000..ec2b2530242
--- /dev/null
+++ b/arch/sh/mm/alignment.c
@@ -0,0 +1,190 @@
+/*
+ * Alignment access counters and corresponding user-space interfaces.
+ *
+ * Copyright (C) 2009 ST Microelectronics
+ * Copyright (C) 2009 - 2010 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#include <linux/ratelimit.h>
+#include <asm/alignment.h>
+#include <asm/processor.h>
+
+static unsigned long se_user;
+static unsigned long se_sys;
+static unsigned long se_half;
+static unsigned long se_word;
+static unsigned long se_dword;
+static unsigned long se_multi;
+/* bitfield: 1: warn 2: fixup 4: signal -> combinations 2|4 && 1|2|4 are not
+   valid! */
+static int se_usermode = UM_WARN | UM_FIXUP;
+/* 0: no warning 1: print a warning message, disabled by default */
+static int se_kernmode_warn;
+
+core_param(alignment, se_usermode, int, 0600);
+
+void inc_unaligned_byte_access(void)
+{
+	se_half++;
+}
+
+void inc_unaligned_word_access(void)
+{
+	se_word++;
+}
+
+void inc_unaligned_dword_access(void)
+{
+	se_dword++;
+}
+
+void inc_unaligned_multi_access(void)
+{
+	se_multi++;
+}
+
+void inc_unaligned_user_access(void)
+{
+	se_user++;
+}
+
+void inc_unaligned_kernel_access(void)
+{
+	se_sys++;
+}
+
+/*
+ * This defaults to the global policy which can be set from the command
+ * line, while processes can overload their preferences via prctl().
+ */
+unsigned int unaligned_user_action(void)
+{
+	unsigned int action = se_usermode;
+
+	if (current->thread.flags & SH_THREAD_UAC_SIGBUS) {
+		action &= ~UM_FIXUP;
+		action |= UM_SIGNAL;
+	}
+
+	if (current->thread.flags & SH_THREAD_UAC_NOPRINT)
+		action &= ~UM_WARN;
+
+	return action;
+}
+
+int get_unalign_ctl(struct task_struct *tsk, unsigned long addr)
+{
+	return put_user(tsk->thread.flags & SH_THREAD_UAC_MASK,
+			(unsigned int __user *)addr);
+}
+
+int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
+{
+	tsk->thread.flags = (tsk->thread.flags & ~SH_THREAD_UAC_MASK) |
+			    (val & SH_THREAD_UAC_MASK);
+	return 0;
+}
+
+void unaligned_fixups_notify(struct task_struct *tsk, insn_size_t insn,
+			     struct pt_regs *regs)
+{
+	if (user_mode(regs) && (se_usermode & UM_WARN))
+		pr_notice_ratelimited("Fixing up unaligned userspace access "
+			  "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n",
+			  tsk->comm, task_pid_nr(tsk),
+			  (void *)instruction_pointer(regs), insn);
+	else if (se_kernmode_warn)
+		pr_notice_ratelimited("Fixing up unaligned kernel access "
+			  "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n",
+			  tsk->comm, task_pid_nr(tsk),
+			  (void *)instruction_pointer(regs), insn);
+}
+
+static const char *se_usermode_action[] = {
+	"ignored",
+	"warn",
+	"fixup",
+	"fixup+warn",
+	"signal",
+	"signal+warn"
+};
+
+static int alignment_proc_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "User:\t\t%lu\n", se_user);
+	seq_printf(m, "System:\t\t%lu\n", se_sys);
+	seq_printf(m, "Half:\t\t%lu\n", se_half);
+	seq_printf(m, "Word:\t\t%lu\n", se_word);
+	seq_printf(m, "DWord:\t\t%lu\n", se_dword);
+	seq_printf(m, "Multi:\t\t%lu\n", se_multi);
+	seq_printf(m, "User faults:\t%i (%s)\n", se_usermode,
+			se_usermode_action[se_usermode]);
+	seq_printf(m, "Kernel faults:\t%i (fixup%s)\n", se_kernmode_warn,
+			se_kernmode_warn ? "+warn" : "");
+	return 0;
+}
+
+static int alignment_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, alignment_proc_show, NULL);
+}
+
+static ssize_t alignment_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
+{
+	int *data = PDE_DATA(file_inode(file));
+	char mode;
+
+	if (count > 0) {
+		if (get_user(mode, buffer))
+			return -EFAULT;
+		if (mode >= '0' && mode <= '5')
+			*data = mode - '0';
+	}
+	return count;
+}
+
+static const struct file_operations alignment_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= alignment_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= alignment_proc_write,
+};
+
+/*
+ * This needs to be done after sysctl_init, otherwise sys/ will be
+ * overwritten.  Actually, this shouldn't be in sys/ at all since
+ * it isn't a sysctl, and it doesn't contain sysctl information.
+ * We now locate it in /proc/cpu/alignment instead.
+ */
+static int __init alignment_init(void)
+{
+	struct proc_dir_entry *dir, *res;
+
+	dir = proc_mkdir("cpu", NULL);
+	if (!dir)
+		return -ENOMEM;
+
+	res = proc_create_data("alignment", S_IWUSR | S_IRUGO, dir,
+			       &alignment_proc_fops, &se_usermode);
+	if (!res)
+		return -ENOMEM;
+
+        res = proc_create_data("kernel_alignment", S_IWUSR | S_IRUGO, dir,
+			       &alignment_proc_fops, &se_kernmode_warn);
+        if (!res)
+                return -ENOMEM;
+
+	return 0;
+}
+fs_initcall(alignment_init);
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
new file mode 100644
index 00000000000..74c03ecc487
--- /dev/null
+++ b/arch/sh/mm/asids-debugfs.c
@@ -0,0 +1,77 @@
+/*
+ * debugfs ops for process ASIDs
+ *
+ *  Copyright (C) 2000, 2001  Paolo Alberelli
+ *  Copyright (C) 2003 - 2008  Paul Mundt
+ *  Copyright (C) 2003, 2004  Richard Curnow
+ *
+ * Provides a debugfs file that lists out the ASIDs currently associated
+ * with the processes.
+ *
+ * In the SH-5 case, if the DM.PC register is examined through the debug
+ * link, this shows ASID + PC. To make use of this, the PID->ASID
+ * relationship needs to be known. This is primarily for debugging.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <asm/processor.h>
+#include <asm/mmu_context.h>
+
+static int asids_seq_show(struct seq_file *file, void *iter)
+{
+	struct task_struct *p;
+
+	read_lock(&tasklist_lock);
+
+	for_each_process(p) {
+		int pid = p->pid;
+
+		if (unlikely(!pid))
+			continue;
+
+		if (p->mm)
+			seq_printf(file, "%5d : %04lx\n", pid,
+				   cpu_asid(smp_processor_id(), p->mm));
+	}
+
+	read_unlock(&tasklist_lock);
+
+	return 0;
+}
+
+static int asids_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, asids_seq_show, inode->i_private);
+}
+
+static const struct file_operations asids_debugfs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= asids_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init asids_debugfs_init(void)
+{
+	struct dentry *asids_dentry;
+
+	asids_dentry = debugfs_create_file("asids", S_IRUSR, arch_debugfs_dir,
+					   NULL, &asids_debugfs_fops);
+	if (!asids_dentry)
+		return -ENOMEM;
+	if (IS_ERR(asids_dentry))
+		return PTR_ERR(asids_dentry);
+
+	return 0;
+}
+module_init(asids_debugfs_init);
+
+MODULE_LICENSE("GPL v2");
diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c
index db6d950b6f5..777e50f33c0 100644
--- a/arch/sh/mm/cache-debugfs.c
+++ b/arch/sh/mm/cache-debugfs.c
@@ -22,14 +22,13 @@ enum cache_type {
 	CACHE_TYPE_UNIFIED,
 };
 
-static int __uses_jump_to_uncached cache_seq_show(struct seq_file *file,
-						  void *iter)
+static int cache_seq_show(struct seq_file *file, void *iter)
 {
 	unsigned int cache_type = (unsigned int)file->private;
 	struct cache_info *cache;
-	unsigned int waysize, way, cache_size;
-	unsigned long ccr, base;
-	static unsigned long addrstart = 0;
+	unsigned int waysize, way;
+	unsigned long ccr;
+	unsigned long addrstart = 0;
 
 	/*
 	 * Go uncached immediately so we don't skew the results any
@@ -37,7 +36,7 @@ static int __uses_jump_to_uncached cache_seq_show(struct seq_file *file,
 	 */
 	jump_to_uncached();
 
-	ccr = ctrl_inl(CCR);
+	ccr = __raw_readl(SH_CCR);
 	if ((ccr & CCR_CACHE_ENABLE) == 0) {
 		back_to_cached();
 
@@ -46,28 +45,13 @@ static int __uses_jump_to_uncached cache_seq_show(struct seq_file *file,
 	}
 
 	if (cache_type == CACHE_TYPE_DCACHE) {
-		base = CACHE_OC_ADDRESS_ARRAY;
+		addrstart = CACHE_OC_ADDRESS_ARRAY;
 		cache = &current_cpu_data.dcache;
 	} else {
-		base = CACHE_IC_ADDRESS_ARRAY;
+		addrstart = CACHE_IC_ADDRESS_ARRAY;
 		cache = &current_cpu_data.icache;
 	}
 
-	/*
-	 * Due to the amount of data written out (depending on the cache size),
-	 * we may be iterated over multiple times. In this case, keep track of
-	 * the entry position in addrstart, and rewind it when we've hit the
-	 * end of the cache.
-	 *
-	 * Likewise, the same code is used for multiple caches, so care must
-	 * be taken for bouncing addrstart back and forth so the appropriate
-	 * cache is hit.
-	 */
-	cache_size = cache->ways * cache->sets * cache->linesz;
-	if (((addrstart & 0xff000000) != base) ||
-	     (addrstart & 0x00ffffff) > cache_size)
-		addrstart = base;
-
 	waysize = cache->sets;
 
 	/*
@@ -90,7 +74,7 @@ static int __uses_jump_to_uncached cache_seq_show(struct seq_file *file,
 		for (addr = addrstart, line = 0;
 		     addr < addrstart + waysize;
 		     addr += cache->linesz, line++) {
-			unsigned long data = ctrl_inl(addr);
+			unsigned long data = __raw_readl(addr);
 
 			/* Check the V bit, ignore invalid cachelines */
 			if ((data & 1) == 0)
@@ -120,25 +104,25 @@ static const struct file_operations cache_debugfs_fops = {
 	.open		= cache_debugfs_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release,
+	.release	= single_release,
 };
 
 static int __init cache_debugfs_init(void)
 {
 	struct dentry *dcache_dentry, *icache_dentry;
 
-	dcache_dentry = debugfs_create_file("dcache", S_IRUSR, NULL,
+	dcache_dentry = debugfs_create_file("dcache", S_IRUSR, arch_debugfs_dir,
 					    (unsigned int *)CACHE_TYPE_DCACHE,
 					    &cache_debugfs_fops);
-	if (IS_ERR(dcache_dentry))
-		return PTR_ERR(dcache_dentry);
+	if (!dcache_dentry)
+		return -ENOMEM;
 
-	icache_dentry = debugfs_create_file("icache", S_IRUSR, NULL,
+	icache_dentry = debugfs_create_file("icache", S_IRUSR, arch_debugfs_dir,
 					    (unsigned int *)CACHE_TYPE_ICACHE,
 					    &cache_debugfs_fops);
-	if (IS_ERR(icache_dentry)) {
+	if (!icache_dentry) {
 		debugfs_remove(dcache_dentry);
-		return PTR_ERR(icache_dentry);
+		return -ENOMEM;
 	}
 
 	return 0;
diff --git a/arch/sh/mm/cache-sh2.c b/arch/sh/mm/cache-sh2.c
index 6614033f6be..a74259f2f98 100644
--- a/arch/sh/mm/cache-sh2.c
+++ b/arch/sh/mm/cache-sh2.c
@@ -2,6 +2,7 @@
  * arch/sh/mm/cache-sh2.c
  *
  * Copyright (C) 2002 Paul Mundt
+ * Copyright (C) 2008 Yoshinori Sato
  *
  * Released under the terms of the GNU GPL v2.0.
  */
@@ -15,7 +16,7 @@
 #include <asm/cacheflush.h>
 #include <asm/io.h>
 
-void __flush_wback_region(void *start, int size)
+static void sh2__flush_wback_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -24,12 +25,19 @@ void __flush_wback_region(void *start, int size)
 	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
 		& ~(L1_CACHE_BYTES-1);
 	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		/* FIXME cache purge */
-		ctrl_outl((v & 0x1ffffc00), (v & 0x00000ff0) | 0x00000008);
+		unsigned long addr = CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0);
+		int way;
+		for (way = 0; way < 4; way++) {
+			unsigned long data =  __raw_readl(addr | (way << 12));
+			if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) {
+				data &= ~SH_CACHE_UPDATED;
+				__raw_writel(data, addr | (way << 12));
+			}
+		}
 	}
 }
 
-void __flush_purge_region(void *start, int size)
+static void sh2__flush_purge_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -37,21 +45,47 @@ void __flush_purge_region(void *start, int size)
 	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
 	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
 		& ~(L1_CACHE_BYTES-1);
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		ctrl_outl((v & 0x1ffffc00), (v & 0x00000ff0) | 0x00000008);
-	}
+
+	for (v = begin; v < end; v+=L1_CACHE_BYTES)
+		__raw_writel((v & CACHE_PHYSADDR_MASK),
+			  CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0) | 0x00000008);
 }
 
-void __flush_invalidate_region(void *start, int size)
+static void sh2__flush_invalidate_region(void *start, int size)
 {
+#ifdef CONFIG_CACHE_WRITEBACK
+	/*
+	 * SH-2 does not support individual line invalidation, only a
+	 * global invalidate.
+	 */
+	unsigned long ccr;
+	unsigned long flags;
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	ccr = __raw_readl(SH_CCR);
+	ccr |= CCR_CACHE_INVALIDATE;
+	__raw_writel(ccr, SH_CCR);
+
+	back_to_cached();
+	local_irq_restore(flags);
+#else
 	unsigned long v;
 	unsigned long begin, end;
 
 	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
 	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
 		& ~(L1_CACHE_BYTES-1);
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		ctrl_outl((v & 0x1ffffc00), (v & 0x00000ff0) | 0x00000008);
-	}
+
+	for (v = begin; v < end; v+=L1_CACHE_BYTES)
+		__raw_writel((v & CACHE_PHYSADDR_MASK),
+			  CACHE_OC_ADDRESS_ARRAY | (v & 0x00000ff0) | 0x00000008);
+#endif
 }
 
+void __init sh2_cache_init(void)
+{
+	__flush_wback_region		= sh2__flush_wback_region;
+	__flush_purge_region		= sh2__flush_purge_region;
+	__flush_invalidate_region	= sh2__flush_invalidate_region;
+}
diff --git a/arch/sh/mm/cache-sh2a.c b/arch/sh/mm/cache-sh2a.c
new file mode 100644
index 00000000000..ee87d081259
--- /dev/null
+++ b/arch/sh/mm/cache-sh2a.c
@@ -0,0 +1,189 @@
+/*
+ * arch/sh/mm/cache-sh2a.c
+ *
+ * Copyright (C) 2008 Yoshinori Sato
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#include <asm/cache.h>
+#include <asm/addrspace.h>
+#include <asm/processor.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
+
+/*
+ * The maximum number of pages we support up to when doing ranged dcache
+ * flushing. Anything exceeding this will simply flush the dcache in its
+ * entirety.
+ */
+#define MAX_OCACHE_PAGES	32
+#define MAX_ICACHE_PAGES	32
+
+#ifdef CONFIG_CACHE_WRITEBACK
+static void sh2a_flush_oc_line(unsigned long v, int way)
+{
+	unsigned long addr = (v & 0x000007f0) | (way << 11);
+	unsigned long data;
+
+	data = __raw_readl(CACHE_OC_ADDRESS_ARRAY | addr);
+	if ((data & CACHE_PHYSADDR_MASK) == (v & CACHE_PHYSADDR_MASK)) {
+		data &= ~SH_CACHE_UPDATED;
+		__raw_writel(data, CACHE_OC_ADDRESS_ARRAY | addr);
+	}
+}
+#endif
+
+static void sh2a_invalidate_line(unsigned long cache_addr, unsigned long v)
+{
+	/* Set associative bit to hit all ways */
+	unsigned long addr = (v & 0x000007f0) | SH_CACHE_ASSOC;
+	__raw_writel((addr & CACHE_PHYSADDR_MASK), cache_addr | addr);
+}
+
+/*
+ * Write back the dirty D-caches, but not invalidate them.
+ */
+static void sh2a__flush_wback_region(void *start, int size)
+{
+#ifdef CONFIG_CACHE_WRITEBACK
+	unsigned long v;
+	unsigned long begin, end;
+	unsigned long flags;
+	int nr_ways;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	nr_ways = current_cpu_data.dcache.ways;
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	/* If there are too many pages then flush the entire cache */
+	if (((end - begin) >> PAGE_SHIFT) >= MAX_OCACHE_PAGES) {
+		begin = CACHE_OC_ADDRESS_ARRAY;
+		end = begin + (nr_ways * current_cpu_data.dcache.way_size);
+
+		for (v = begin; v < end; v += L1_CACHE_BYTES) {
+			unsigned long data = __raw_readl(v);
+			if (data & SH_CACHE_UPDATED)
+				__raw_writel(data & ~SH_CACHE_UPDATED, v);
+		}
+	} else {
+		int way;
+		for (way = 0; way < nr_ways; way++) {
+			for (v = begin; v < end; v += L1_CACHE_BYTES)
+				sh2a_flush_oc_line(v, way);
+		}
+	}
+
+	back_to_cached();
+	local_irq_restore(flags);
+#endif
+}
+
+/*
+ * Write back the dirty D-caches and invalidate them.
+ */
+static void sh2a__flush_purge_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+	unsigned long flags;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
+#ifdef CONFIG_CACHE_WRITEBACK
+		int way;
+		int nr_ways = current_cpu_data.dcache.ways;
+		for (way = 0; way < nr_ways; way++)
+			sh2a_flush_oc_line(v, way);
+#endif
+		sh2a_invalidate_line(CACHE_OC_ADDRESS_ARRAY, v);
+	}
+
+	back_to_cached();
+	local_irq_restore(flags);
+}
+
+/*
+ * Invalidate the D-caches, but no write back please
+ */
+static void sh2a__flush_invalidate_region(void *start, int size)
+{
+	unsigned long v;
+	unsigned long begin, end;
+	unsigned long flags;
+
+	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
+	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	/* If there are too many pages then just blow the cache */
+	if (((end - begin) >> PAGE_SHIFT) >= MAX_OCACHE_PAGES) {
+		__raw_writel(__raw_readl(SH_CCR) | CCR_OCACHE_INVALIDATE,
+			     SH_CCR);
+	} else {
+		for (v = begin; v < end; v += L1_CACHE_BYTES)
+			sh2a_invalidate_line(CACHE_OC_ADDRESS_ARRAY, v);
+	}
+
+	back_to_cached();
+	local_irq_restore(flags);
+}
+
+/*
+ * Write back the range of D-cache, and purge the I-cache.
+ */
+static void sh2a_flush_icache_range(void *args)
+{
+	struct flusher_data *data = args;
+	unsigned long start, end;
+	unsigned long v;
+	unsigned long flags;
+
+	start = data->addr1 & ~(L1_CACHE_BYTES-1);
+	end = (data->addr2 + L1_CACHE_BYTES-1) & ~(L1_CACHE_BYTES-1);
+
+#ifdef CONFIG_CACHE_WRITEBACK
+	sh2a__flush_wback_region((void *)start, end-start);
+#endif
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	/* I-Cache invalidate */
+	/* If there are too many pages then just blow the cache */
+	if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) {
+		__raw_writel(__raw_readl(SH_CCR) | CCR_ICACHE_INVALIDATE,
+			     SH_CCR);
+	} else {
+		for (v = start; v < end; v += L1_CACHE_BYTES)
+			sh2a_invalidate_line(CACHE_IC_ADDRESS_ARRAY, v);
+	}
+
+	back_to_cached();
+	local_irq_restore(flags);
+}
+
+void __init sh2a_cache_init(void)
+{
+	local_flush_icache_range	= sh2a_flush_icache_range;
+
+	__flush_wback_region		= sh2a__flush_wback_region;
+	__flush_purge_region		= sh2a__flush_purge_region;
+	__flush_invalidate_region	= sh2a__flush_invalidate_region;
+}
diff --git a/arch/sh/mm/cache-sh3.c b/arch/sh/mm/cache-sh3.c
index 6d1dbec08ad..e37523f6519 100644
--- a/arch/sh/mm/cache-sh3.c
+++ b/arch/sh/mm/cache-sh3.c
@@ -32,7 +32,7 @@
  * SIZE: Size of the region.
  */
 
-void __flush_wback_region(void *start, int size)
+static void sh3__flush_wback_region(void *start, int size)
 {
 	unsigned long v, j;
 	unsigned long begin, end;
@@ -50,12 +50,12 @@ void __flush_wback_region(void *start, int size)
 			p = __pa(v);
 			addr = addrstart | (v & current_cpu_data.dcache.entry_mask);
 			local_irq_save(flags);
-			data = ctrl_inl(addr);
+			data = __raw_readl(addr);
 
 			if ((data & CACHE_PHYSADDR_MASK) ==
 			    (p & CACHE_PHYSADDR_MASK)) {
 				data &= ~SH_CACHE_UPDATED;
-				ctrl_outl(data, addr);
+				__raw_writel(data, addr);
 				local_irq_restore(flags);
 				break;
 			}
@@ -71,7 +71,7 @@ void __flush_wback_region(void *start, int size)
  * START: Virtual Address (U0, P1, or P3)
  * SIZE: Size of the region.
  */
-void __flush_purge_region(void *start, int size)
+static void sh3__flush_purge_region(void *start, int size)
 {
 	unsigned long v;
 	unsigned long begin, end;
@@ -86,15 +86,20 @@ void __flush_purge_region(void *start, int size)
 		data = (v & 0xfffffc00); /* _Virtual_ address, ~U, ~V */
 		addr = CACHE_OC_ADDRESS_ARRAY |
 			(v & current_cpu_data.dcache.entry_mask) | SH_CACHE_ASSOC;
-		ctrl_outl(data, addr);
+		__raw_writel(data, addr);
 	}
 }
 
-/*
- * No write back please
- *
- * Except I don't think there's any way to avoid the writeback. So we
- * just alias it to __flush_purge_region(). dwmw2.
- */
-void __flush_invalidate_region(void *start, int size)
-	__attribute__((alias("__flush_purge_region")));
+void __init sh3_cache_init(void)
+{
+	__flush_wback_region = sh3__flush_wback_region;
+	__flush_purge_region = sh3__flush_purge_region;
+
+	/*
+	 * No write back please
+	 *
+	 * Except I don't think there's any way to avoid the writeback.
+	 * So we just alias it to sh3__flush_purge_region(). dwmw2.
+	 */
+	__flush_invalidate_region = sh3__flush_purge_region;
+}
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 43d7ff6b6ec..51d8f7f31d1 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -2,8 +2,9 @@
  * arch/sh/mm/cache-sh4.c
  *
  * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2001 - 2007  Paul Mundt
+ * Copyright (C) 2001 - 2009  Paul Mundt
  * Copyright (C) 2003  Richard Curnow
+ * Copyright (c) 2007 STMicroelectronics (R&D) Ltd.
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -13,7 +14,11 @@
 #include <linux/mm.h>
 #include <linux/io.h>
 #include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
 #include <asm/mmu_context.h>
+#include <asm/cache_insns.h>
 #include <asm/cacheflush.h>
 
 /*
@@ -21,217 +26,80 @@
  * flushing. Anything exceeding this will simply flush the dcache in its
  * entirety.
  */
-#define MAX_DCACHE_PAGES	64	/* XXX: Tune for ways */
+#define MAX_ICACHE_PAGES	32
 
-static void __flush_dcache_segment_1way(unsigned long start,
-					unsigned long extent);
-static void __flush_dcache_segment_2way(unsigned long start,
-					unsigned long extent);
-static void __flush_dcache_segment_4way(unsigned long start,
-					unsigned long extent);
-
-static void __flush_cache_4096(unsigned long addr, unsigned long phys,
+static void __flush_cache_one(unsigned long addr, unsigned long phys,
 			       unsigned long exec_offset);
 
 /*
- * This is initialised here to ensure that it is not placed in the BSS.  If
- * that were to happen, note that cache_init gets called before the BSS is
- * cleared, so this would get nulled out which would be hopeless.
- */
-static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) =
-	(void (*)(unsigned long, unsigned long))0xdeadbeef;
-
-static void compute_alias(struct cache_info *c)
-{
-	c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1);
-	c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0;
-}
-
-static void __init emit_cache_params(void)
-{
-	printk("PVR=%08x CVR=%08x PRR=%08x\n",
-		ctrl_inl(CCN_PVR),
-		ctrl_inl(CCN_CVR),
-		ctrl_inl(CCN_PRR));
-	printk("I-cache : n_ways=%d n_sets=%d way_incr=%d\n",
-		boot_cpu_data.icache.ways,
-		boot_cpu_data.icache.sets,
-		boot_cpu_data.icache.way_incr);
-	printk("I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
-		boot_cpu_data.icache.entry_mask,
-		boot_cpu_data.icache.alias_mask,
-		boot_cpu_data.icache.n_aliases);
-	printk("D-cache : n_ways=%d n_sets=%d way_incr=%d\n",
-		boot_cpu_data.dcache.ways,
-		boot_cpu_data.dcache.sets,
-		boot_cpu_data.dcache.way_incr);
-	printk("D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
-		boot_cpu_data.dcache.entry_mask,
-		boot_cpu_data.dcache.alias_mask,
-		boot_cpu_data.dcache.n_aliases);
-
-	/*
-	 * Emit Secondary Cache parameters if the CPU has a probed L2.
-	 */
-	if (boot_cpu_data.flags & CPU_HAS_L2_CACHE) {
-		printk("S-cache : n_ways=%d n_sets=%d way_incr=%d\n",
-			boot_cpu_data.scache.ways,
-			boot_cpu_data.scache.sets,
-			boot_cpu_data.scache.way_incr);
-		printk("S-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
-			boot_cpu_data.scache.entry_mask,
-			boot_cpu_data.scache.alias_mask,
-			boot_cpu_data.scache.n_aliases);
-	}
-
-	if (!__flush_dcache_segment_fn)
-		panic("unknown number of cache ways\n");
-}
-
-/*
- * SH-4 has virtually indexed and physically tagged cache.
- */
-void __init p3_cache_init(void)
-{
-	compute_alias(&boot_cpu_data.icache);
-	compute_alias(&boot_cpu_data.dcache);
-	compute_alias(&boot_cpu_data.scache);
-
-	switch (boot_cpu_data.dcache.ways) {
-	case 1:
-		__flush_dcache_segment_fn = __flush_dcache_segment_1way;
-		break;
-	case 2:
-		__flush_dcache_segment_fn = __flush_dcache_segment_2way;
-		break;
-	case 4:
-		__flush_dcache_segment_fn = __flush_dcache_segment_4way;
-		break;
-	default:
-		__flush_dcache_segment_fn = NULL;
-		break;
-	}
-
-	emit_cache_params();
-}
-
-/*
- * Write back the dirty D-caches, but not invalidate them.
+ * Write back the range of D-cache, and purge the I-cache.
  *
- * START: Virtual Address (U0, P1, or P3)
- * SIZE: Size of the region.
+ * Called from kernel/module.c:sys_init_module and routine for a.out format,
+ * signal handler code and kprobes code
  */
-void __flush_wback_region(void *start, int size)
+static void sh4_flush_icache_range(void *args)
 {
-	unsigned long v;
-	unsigned long begin, end;
-
-	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
-	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		asm volatile("ocbwb	%0"
-			     : /* no output */
-			     : "m" (__m(v)));
-	}
-}
+	struct flusher_data *data = args;
+	unsigned long start, end;
+	unsigned long flags, v;
+	int i;
 
-/*
- * Write back the dirty D-caches and invalidate them.
- *
- * START: Virtual Address (U0, P1, or P3)
- * SIZE: Size of the region.
- */
-void __flush_purge_region(void *start, int size)
-{
-	unsigned long v;
-	unsigned long begin, end;
-
-	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
-	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		asm volatile("ocbp	%0"
-			     : /* no output */
-			     : "m" (__m(v)));
-	}
-}
+	start = data->addr1;
+	end = data->addr2;
 
-/*
- * No write back please
- */
-void __flush_invalidate_region(void *start, int size)
-{
-	unsigned long v;
-	unsigned long begin, end;
-
-	begin = (unsigned long)start & ~(L1_CACHE_BYTES-1);
-	end = ((unsigned long)start + size + L1_CACHE_BYTES-1)
-		& ~(L1_CACHE_BYTES-1);
-	for (v = begin; v < end; v+=L1_CACHE_BYTES) {
-		asm volatile("ocbi	%0"
-			     : /* no output */
-			     : "m" (__m(v)));
+	/* If there are too many pages then just blow away the caches */
+	if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) {
+		local_flush_cache_all(NULL);
+		return;
 	}
-}
 
-/*
- * Write back the range of D-cache, and purge the I-cache.
- *
- * Called from kernel/module.c:sys_init_module and routine for a.out format.
- */
-void flush_icache_range(unsigned long start, unsigned long end)
-{
-	flush_cache_all();
-}
+	/*
+	 * Selectively flush d-cache then invalidate the i-cache.
+	 * This is inefficient, so only use this for small ranges.
+	 */
+	start &= ~(L1_CACHE_BYTES-1);
+	end += L1_CACHE_BYTES-1;
+	end &= ~(L1_CACHE_BYTES-1);
 
-/*
- * Write back the D-cache and purge the I-cache for signal trampoline.
- * .. which happens to be the same behavior as flush_icache_range().
- * So, we simply flush out a line.
- */
-void __uses_jump_to_uncached flush_cache_sigtramp(unsigned long addr)
-{
-	unsigned long v, index;
-	unsigned long flags;
-	int i;
+	local_irq_save(flags);
+	jump_to_uncached();
 
-	v = addr & ~(L1_CACHE_BYTES-1);
-	asm volatile("ocbwb	%0"
-		     : /* no output */
-		     : "m" (__m(v)));
+	for (v = start; v < end; v += L1_CACHE_BYTES) {
+		unsigned long icacheaddr;
+		int j, n;
 
-	index = CACHE_IC_ADDRESS_ARRAY |
-			(v & boot_cpu_data.icache.entry_mask);
+		__ocbwb(v);
 
-	local_irq_save(flags);
-	jump_to_uncached();
+		icacheaddr = CACHE_IC_ADDRESS_ARRAY | (v &
+				cpu_data->icache.entry_mask);
 
-	for (i = 0; i < boot_cpu_data.icache.ways;
-	     i++, index += boot_cpu_data.icache.way_incr)
-		ctrl_outl(0, index);	/* Clear out Valid-bit */
+		/* Clear i-cache line valid-bit */
+		n = boot_cpu_data.icache.n_aliases;
+		for (i = 0; i < cpu_data->icache.ways; i++) {
+			for (j = 0; j < n; j++)
+				__raw_writel(0, icacheaddr + (j * PAGE_SIZE));
+			icacheaddr += cpu_data->icache.way_incr;
+		}
+	}
 
 	back_to_cached();
-	wmb();
 	local_irq_restore(flags);
 }
 
-static inline void flush_cache_4096(unsigned long start,
-				    unsigned long phys)
+static inline void flush_cache_one(unsigned long start, unsigned long phys)
 {
 	unsigned long flags, exec_offset = 0;
 
 	/*
-	 * All types of SH-4 require PC to be in P2 to operate on the I-cache.
-	 * Some types of SH-4 require PC to be in P2 to operate on the D-cache.
+	 * All types of SH-4 require PC to be uncached to operate on the I-cache.
+	 * Some types of SH-4 require PC to be uncached to operate on the D-cache.
 	 */
 	if ((boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG) ||
 	    (start < CACHE_OC_ADDRESS_ARRAY))
-		exec_offset = 0x20000000;
+		exec_offset = cached_to_uncached;
 
 	local_irq_save(flags);
-	__flush_cache_4096(start | SH_CACHE_ASSOC,
-			   P1SEGADDR(phys), exec_offset);
+	__flush_cache_one(start, phys, exec_offset);
 	local_irq_restore(flags);
 }
 
@@ -239,24 +107,25 @@ static inline void flush_cache_4096(unsigned long start,
  * Write back & invalidate the D-cache of the page.
  * (To avoid "alias" issues)
  */
-void flush_dcache_page(struct page *page)
+static void sh4_flush_dcache_page(void *arg)
 {
-	if (test_bit(PG_mapped, &page->flags)) {
-		unsigned long phys = PHYSADDR(page_address(page));
-		unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
-		int i, n;
-
-		/* Loop all the D-cache */
-		n = boot_cpu_data.dcache.n_aliases;
-		for (i = 0; i < n; i++, addr += 4096)
-			flush_cache_4096(addr, phys);
-	}
+	struct page *page = arg;
+	unsigned long addr = (unsigned long)page_address(page);
+#ifndef CONFIG_SMP
+	struct address_space *mapping = page_mapping(page);
+
+	if (mapping && !mapping_mapped(mapping))
+		clear_bit(PG_dcache_clean, &page->flags);
+	else
+#endif
+		flush_cache_one(CACHE_OC_ADDRESS_ARRAY |
+				(addr & shm_align_mask), page_to_phys(page));
 
 	wmb();
 }
 
 /* TODO: Selective icache invalidation through IC address array.. */
-static inline void __uses_jump_to_uncached flush_icache_all(void)
+static void flush_icache_all(void)
 {
 	unsigned long flags, ccr;
 
@@ -264,9 +133,9 @@ static inline void __uses_jump_to_uncached flush_icache_all(void)
 	jump_to_uncached();
 
 	/* Flush I-cache */
-	ccr = ctrl_inl(CCR);
+	ccr = __raw_readl(SH_CCR);
 	ccr |= CCR_CACHE_ICI;
-	ctrl_outl(ccr, CCR);
+	__raw_writel(ccr, SH_CCR);
 
 	/*
 	 * back_to_cached() will take care of the barrier for us, don't add
@@ -277,133 +146,53 @@ static inline void __uses_jump_to_uncached flush_icache_all(void)
 	local_irq_restore(flags);
 }
 
-void flush_dcache_all(void)
+static void flush_dcache_all(void)
 {
-	(*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size);
-	wmb();
+	unsigned long addr, end_addr, entry_offset;
+
+	end_addr = CACHE_OC_ADDRESS_ARRAY +
+		(current_cpu_data.dcache.sets <<
+		 current_cpu_data.dcache.entry_shift) *
+			current_cpu_data.dcache.ways;
+
+	entry_offset = 1 << current_cpu_data.dcache.entry_shift;
+
+	for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; ) {
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+	}
 }
 
-void flush_cache_all(void)
+static void sh4_flush_cache_all(void *unused)
 {
 	flush_dcache_all();
 	flush_icache_all();
 }
 
-static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
-			     unsigned long end)
-{
-	unsigned long d = 0, p = start & PAGE_MASK;
-	unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
-	unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
-	unsigned long select_bit;
-	unsigned long all_aliases_mask;
-	unsigned long addr_offset;
-	pgd_t *dir;
-	pmd_t *pmd;
-	pud_t *pud;
-	pte_t *pte;
-	int i;
-
-	dir = pgd_offset(mm, p);
-	pud = pud_offset(dir, p);
-	pmd = pmd_offset(pud, p);
-	end = PAGE_ALIGN(end);
-
-	all_aliases_mask = (1 << n_aliases) - 1;
-
-	do {
-		if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
-			p &= PMD_MASK;
-			p += PMD_SIZE;
-			pmd++;
-
-			continue;
-		}
-
-		pte = pte_offset_kernel(pmd, p);
-
-		do {
-			unsigned long phys;
-			pte_t entry = *pte;
-
-			if (!(pte_val(entry) & _PAGE_PRESENT)) {
-				pte++;
-				p += PAGE_SIZE;
-				continue;
-			}
-
-			phys = pte_val(entry) & PTE_PHYS_MASK;
-
-			if ((p ^ phys) & alias_mask) {
-				d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
-				d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
-
-				if (d == all_aliases_mask)
-					goto loop_exit;
-			}
-
-			pte++;
-			p += PAGE_SIZE;
-		} while (p < end && ((unsigned long)pte & ~PAGE_MASK));
-		pmd++;
-	} while (p < end);
-
-loop_exit:
-	addr_offset = 0;
-	select_bit = 1;
-
-	for (i = 0; i < n_aliases; i++) {
-		if (d & select_bit) {
-			(*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
-			wmb();
-		}
-
-		select_bit <<= 1;
-		addr_offset += PAGE_SIZE;
-	}
-}
-
 /*
  * Note : (RPC) since the caches are physically tagged, the only point
  * of flush_cache_mm for SH-4 is to get rid of aliases from the
  * D-cache.  The assumption elsewhere, e.g. flush_cache_range, is that
  * lines can stay resident so long as the virtual address they were
  * accessed with (hence cache set) is in accord with the physical
- * address (i.e. tag).  It's no different here.  So I reckon we don't
- * need to flush the I-cache, since aliases don't matter for that.  We
- * should try that.
+ * address (i.e. tag).  It's no different here.
  *
  * Caller takes mm->mmap_sem.
  */
-void flush_cache_mm(struct mm_struct *mm)
+static void sh4_flush_cache_mm(void *arg)
 {
-	/*
-	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
-	 * the cache is physically tagged, the data can just be left in there.
-	 */
-	if (boot_cpu_data.dcache.n_aliases == 0)
-		return;
+	struct mm_struct *mm = arg;
 
-	/*
-	 * Don't bother groveling around the dcache for the VMA ranges
-	 * if there are too many PTEs to make it worthwhile.
-	 */
-	if (mm->nr_ptes >= MAX_DCACHE_PAGES)
-		flush_dcache_all();
-	else {
-		struct vm_area_struct *vma;
-
-		/*
-		 * In this case there are reasonably sized ranges to flush,
-		 * iterate through the VMA list and take care of any aliases.
-		 */
-		for (vma = mm->mmap; vma; vma = vma->vm_next)
-			__flush_cache_mm(mm, vma->vm_start, vma->vm_end);
-	}
+	if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
+		return;
 
-	/* Only touch the icache if one of the VMAs has VM_EXEC set. */
-	if (mm->exec_vm)
-		flush_icache_all();
+	flush_dcache_all();
 }
 
 /*
@@ -412,39 +201,66 @@ void flush_cache_mm(struct mm_struct *mm)
  * ADDR: Virtual Address (U0 address)
  * PFN: Physical page number
  */
-void flush_cache_page(struct vm_area_struct *vma, unsigned long address,
-		      unsigned long pfn)
+static void sh4_flush_cache_page(void *args)
 {
-	unsigned long phys = pfn << PAGE_SHIFT;
-	unsigned int alias_mask;
-
-	alias_mask = boot_cpu_data.dcache.alias_mask;
-
-	/* We only need to flush D-cache when we have alias */
-	if ((address^phys) & alias_mask) {
-		/* Loop 4K of the D-cache */
-		flush_cache_4096(
-			CACHE_OC_ADDRESS_ARRAY | (address & alias_mask),
-			phys);
-		/* Loop another 4K of the D-cache */
-		flush_cache_4096(
-			CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask),
-			phys);
-	}
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	struct page *page;
+	unsigned long address, pfn, phys;
+	int map_coherent = 0;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	void *vaddr;
 
-	alias_mask = boot_cpu_data.icache.alias_mask;
-	if (vma->vm_flags & VM_EXEC) {
+	vma = data->vma;
+	address = data->addr1 & PAGE_MASK;
+	pfn = data->addr2;
+	phys = pfn << PAGE_SHIFT;
+	page = pfn_to_page(pfn);
+
+	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
+		return;
+
+	pgd = pgd_offset(vma->vm_mm, address);
+	pud = pud_offset(pgd, address);
+	pmd = pmd_offset(pud, address);
+	pte = pte_offset_kernel(pmd, address);
+
+	/* If the page isn't present, there is nothing to do here. */
+	if (!(pte_val(*pte) & _PAGE_PRESENT))
+		return;
+
+	if ((vma->vm_mm == current->active_mm))
+		vaddr = NULL;
+	else {
 		/*
-		 * Evict entries from the portion of the cache from which code
-		 * may have been executed at this address (virtual).  There's
-		 * no need to evict from the portion corresponding to the
-		 * physical address as for the D-cache, because we know the
-		 * kernel has never executed the code through its identity
-		 * translation.
+		 * Use kmap_coherent or kmap_atomic to do flushes for
+		 * another ASID than the current one.
 		 */
-		flush_cache_4096(
-			CACHE_IC_ADDRESS_ARRAY | (address & alias_mask),
-			phys);
+		map_coherent = (current_cpu_data.dcache.n_aliases &&
+			test_bit(PG_dcache_clean, &page->flags) &&
+			page_mapped(page));
+		if (map_coherent)
+			vaddr = kmap_coherent(page, address);
+		else
+			vaddr = kmap_atomic(page);
+
+		address = (unsigned long)vaddr;
+	}
+
+	flush_cache_one(CACHE_OC_ADDRESS_ARRAY |
+			(address & shm_align_mask), phys);
+
+	if (vma->vm_flags & VM_EXEC)
+		flush_icache_all();
+
+	if (vaddr) {
+		if (map_coherent)
+			kunmap_coherent(vaddr);
+		else
+			kunmap_atomic(vaddr);
 	}
 }
 
@@ -457,9 +273,19 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long address,
  * Flushing the cache lines for U0 only isn't enough.
  * We need to flush for P1 too, which may contain aliases.
  */
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-		       unsigned long end)
+static void sh4_flush_cache_range(void *args)
 {
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	unsigned long start, end;
+
+	vma = data->vma;
+	start = data->addr1;
+	end = data->addr2;
+
+	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
+		return;
+
 	/*
 	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
 	 * the cache is physically tagged, the data can just be left in there.
@@ -467,42 +293,14 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 	if (boot_cpu_data.dcache.n_aliases == 0)
 		return;
 
-	/*
-	 * Don't bother with the lookup and alias check if we have a
-	 * wide range to cover, just blow away the dcache in its
-	 * entirety instead. -- PFM.
-	 */
-	if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
-		flush_dcache_all();
-	else
-		__flush_cache_mm(vma->vm_mm, start, end);
+	flush_dcache_all();
 
-	if (vma->vm_flags & VM_EXEC) {
-		/*
-		 * TODO: Is this required???  Need to look at how I-cache
-		 * coherency is assured when new programs are loaded to see if
-		 * this matters.
-		 */
+	if (vma->vm_flags & VM_EXEC)
 		flush_icache_all();
-	}
-}
-
-/*
- * flush_icache_user_range
- * @vma: VMA of the process
- * @page: page
- * @addr: U0 address
- * @len: length of the range (< page size)
- */
-void flush_icache_user_range(struct vm_area_struct *vma,
-			     struct page *page, unsigned long addr, int len)
-{
-	flush_cache_page(vma, addr, page_to_pfn(page));
-	mb();
 }
 
 /**
- * __flush_cache_4096
+ * __flush_cache_one
  *
  * @addr:  address in memory mapped cache array
  * @phys:  P1 address to flush (has to match tags if addr has 'A' bit
@@ -515,7 +313,7 @@ void flush_icache_user_range(struct vm_area_struct *vma,
  * operation (purge/write-back) is selected by the lower 2 bits of
  * 'phys'.
  */
-static void __flush_cache_4096(unsigned long addr, unsigned long phys,
+static void __flush_cache_one(unsigned long addr, unsigned long phys,
 			       unsigned long exec_offset)
 {
 	int way_count;
@@ -572,199 +370,25 @@ static void __flush_cache_4096(unsigned long addr, unsigned long phys,
 	} while (--way_count != 0);
 }
 
+extern void __weak sh4__flush_region_init(void);
+
 /*
- * Break the 1, 2 and 4 way variants of this out into separate functions to
- * avoid nearly all the overhead of having the conditional stuff in the function
- * bodies (+ the 1 and 2 way cases avoid saving any registers too).
+ * SH-4 has virtually indexed and physically tagged cache.
  */
-static void __flush_dcache_segment_1way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/*
-	 * The previous code aligned base_addr to 16k, i.e. the way_size of all
-	 * existing SH-4 D-caches.  Whilst I don't see a need to have this
-	 * aligned to any better than the cache line size (which it will be
-	 * anyway by construction), let's align it to at least the way_size of
-	 * any existing or conceivable SH-4 D-cache.  -- RPC
-	 */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-	} while (a0 < a0e);
-}
-
-static void __flush_dcache_segment_2way(unsigned long start,
-					unsigned long extent_per_way)
+void __init sh4_cache_init(void)
 {
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a1, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/* See comment under 1-way above */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a1 = a0 + way_incr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-		a1 += linesz;
-	} while (a0 < a0e);
-}
-
-static void __flush_dcache_segment_4way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a1, a2, a3, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/* See comment under 1-way above */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a1 = a0 + way_incr;
-	a2 = a1 + way_incr;
-	a3 = a2 + way_incr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-	} while (a0 < a0e);
+	printk("PVR=%08x CVR=%08x PRR=%08x\n",
+		__raw_readl(CCN_PVR),
+		__raw_readl(CCN_CVR),
+		__raw_readl(CCN_PRR));
+
+	local_flush_icache_range	= sh4_flush_icache_range;
+	local_flush_dcache_page		= sh4_flush_dcache_page;
+	local_flush_cache_all		= sh4_flush_cache_all;
+	local_flush_cache_mm		= sh4_flush_cache_mm;
+	local_flush_cache_dup_mm	= sh4_flush_cache_mm;
+	local_flush_cache_page		= sh4_flush_cache_page;
+	local_flush_cache_range		= sh4_flush_cache_range;
+
+	sh4__flush_region_init();
 }
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index 3877321fced..d1bffbcd9d5 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -20,23 +20,11 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 
+extern void __weak sh4__flush_region_init(void);
+
 /* Wired TLB entry for the D-cache */
 static unsigned long long dtlb_cache_slot;
 
-void __init p3_cache_init(void)
-{
-	/* Reserve a slot for dcache colouring in the DTLB */
-	dtlb_cache_slot	= sh64_get_wired_dtlb_entry();
-}
-
-#ifdef CONFIG_DCACHE_DISABLED
-#define sh64_dcache_purge_all()					do { } while (0)
-#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr)	do { } while (0)
-#define sh64_dcache_purge_user_range(mm, start, end)		do { } while (0)
-#define sh64_dcache_purge_phy_page(paddr)			do { } while (0)
-#define sh64_dcache_purge_virt_page(mm, eaddr)			do { } while (0)
-#endif
-
 /*
  * The following group of functions deal with mapping and unmapping a
  * temporary page into a DTLB slot that has been set aside for exclusive
@@ -56,11 +44,10 @@ static inline void sh64_teardown_dtlb_cache_slot(void)
 	local_irq_enable();
 }
 
-#ifndef CONFIG_ICACHE_DISABLED
 static inline void sh64_icache_inv_all(void)
 {
 	unsigned long long addr, flag, data;
-	unsigned int flags;
+	unsigned long flags;
 
 	addr = ICCR0;
 	flag = ICCR0_ICI;
@@ -172,7 +159,7 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
 		unsigned long eaddr;
 		unsigned long after_last_page_start;
 		unsigned long mm_asid, current_asid;
-		unsigned long long flags = 0ULL;
+		unsigned long flags = 0;
 
 		mm_asid = cpu_asid(smp_processor_id(), mm);
 		current_asid = get_asid();
@@ -214,52 +201,6 @@ static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
 	}
 }
 
-/*
- * Invalidate a small range of user context I-cache, not necessarily page
- * (or even cache-line) aligned.
- *
- * Since this is used inside ptrace, the ASID in the mm context typically
- * won't match current_asid.  We'll have to switch ASID to do this.  For
- * safety, and given that the range will be small, do all this under cli.
- *
- * Note, there is a hazard that the ASID in mm->context is no longer
- * actually associated with mm, i.e. if the mm->context has started a new
- * cycle since mm was last active.  However, this is just a performance
- * issue: all that happens is that we invalidate lines belonging to
- * another mm, so the owning process has to refill them when that mm goes
- * live again.  mm itself can't have any cache entries because there will
- * have been a flush_cache_all when the new mm->context cycle started.
- */
-static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
-						unsigned long start, int len)
-{
-	unsigned long long eaddr = start;
-	unsigned long long eaddr_end = start + len;
-	unsigned long current_asid, mm_asid;
-	unsigned long long flags;
-	unsigned long long epage_start;
-
-	/*
-	 * Align to start of cache line.  Otherwise, suppose len==8 and
-	 * start was at 32N+28 : the last 4 bytes wouldn't get invalidated.
-	 */
-	eaddr = L1_CACHE_ALIGN(start);
-	eaddr_end = start + len;
-
-	mm_asid = cpu_asid(smp_processor_id(), mm);
-	local_irq_save(flags);
-	current_asid = switch_and_save_asid(mm_asid);
-
-	epage_start = eaddr & PAGE_MASK;
-
-	while (eaddr < eaddr_end) {
-		__asm__ __volatile__("icbi %0, 0" : : "r" (eaddr));
-		eaddr += L1_CACHE_BYTES;
-	}
-	switch_and_save_asid(current_asid);
-	local_irq_restore(flags);
-}
-
 static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
 {
 	/* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
@@ -287,9 +228,7 @@ static void sh64_icache_inv_current_user_range(unsigned long start, unsigned lon
 		addr += L1_CACHE_BYTES;
 	}
 }
-#endif /* !CONFIG_ICACHE_DISABLED */
 
-#ifndef CONFIG_DCACHE_DISABLED
 /* Buffer used as the target of alloco instructions to purge data from cache
    sets by natural eviction. -- RPC */
 #define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
@@ -342,7 +281,7 @@ static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
 			 * alloco is a NOP if the cache is write-through.
 			 */
 			if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
-				ctrl_inb(eaddr);
+				__raw_readb((unsigned long)eaddr);
 		}
 	}
 
@@ -541,59 +480,10 @@ static void sh64_dcache_purge_user_range(struct mm_struct *mm,
 }
 
 /*
- * Purge the range of addresses from the D-cache.
- *
- * The addresses lie in the superpage mapping. There's no harm if we
- * overpurge at either end - just a small performance loss.
- */
-void __flush_purge_region(void *start, int size)
-{
-	unsigned long long ullend, addr, aligned_start;
-
-	aligned_start = (unsigned long long)(signed long long)(signed long) start;
-	addr = L1_CACHE_ALIGN(aligned_start);
-	ullend = (unsigned long long) (signed long long) (signed long) start + size;
-
-	while (addr <= ullend) {
-		__asm__ __volatile__ ("ocbp %0, 0" : : "r" (addr));
-		addr += L1_CACHE_BYTES;
-	}
-}
-
-void __flush_wback_region(void *start, int size)
-{
-	unsigned long long ullend, addr, aligned_start;
-
-	aligned_start = (unsigned long long)(signed long long)(signed long) start;
-	addr = L1_CACHE_ALIGN(aligned_start);
-	ullend = (unsigned long long) (signed long long) (signed long) start + size;
-
-	while (addr < ullend) {
-		__asm__ __volatile__ ("ocbwb %0, 0" : : "r" (addr));
-		addr += L1_CACHE_BYTES;
-	}
-}
-
-void __flush_invalidate_region(void *start, int size)
-{
-	unsigned long long ullend, addr, aligned_start;
-
-	aligned_start = (unsigned long long)(signed long long)(signed long) start;
-	addr = L1_CACHE_ALIGN(aligned_start);
-	ullend = (unsigned long long) (signed long long) (signed long) start + size;
-
-	while (addr < ullend) {
-		__asm__ __volatile__ ("ocbi %0, 0" : : "r" (addr));
-		addr += L1_CACHE_BYTES;
-	}
-}
-#endif /* !CONFIG_DCACHE_DISABLED */
-
-/*
  * Invalidate the entire contents of both caches, after writing back to
  * memory any dirty data from the D-cache.
  */
-void flush_cache_all(void)
+static void sh5_flush_cache_all(void *unused)
 {
 	sh64_dcache_purge_all();
 	sh64_icache_inv_all();
@@ -620,7 +510,7 @@ void flush_cache_all(void)
  * I-cache.  This is similar to the lack of action needed in
  * flush_tlb_mm - see fault.c.
  */
-void flush_cache_mm(struct mm_struct *mm)
+static void sh5_flush_cache_mm(void *unused)
 {
 	sh64_dcache_purge_all();
 }
@@ -632,13 +522,18 @@ void flush_cache_mm(struct mm_struct *mm)
  *
  * Note, 'end' is 1 byte beyond the end of the range to flush.
  */
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-		       unsigned long end)
+static void sh5_flush_cache_range(void *args)
 {
-	struct mm_struct *mm = vma->vm_mm;
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	unsigned long start, end;
 
-	sh64_dcache_purge_user_range(mm, start, end);
-	sh64_icache_inv_user_page_range(mm, start, end);
+	vma = data->vma;
+	start = data->addr1;
+	end = data->addr2;
+
+	sh64_dcache_purge_user_range(vma->vm_mm, start, end);
+	sh64_icache_inv_user_page_range(vma->vm_mm, start, end);
 }
 
 /*
@@ -650,183 +545,77 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
  *
  * Note, this is called with pte lock held.
  */
-void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr,
-		      unsigned long pfn)
+static void sh5_flush_cache_page(void *args)
 {
+	struct flusher_data *data = args;
+	struct vm_area_struct *vma;
+	unsigned long eaddr, pfn;
+
+	vma = data->vma;
+	eaddr = data->addr1;
+	pfn = data->addr2;
+
 	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
 
 	if (vma->vm_flags & VM_EXEC)
 		sh64_icache_inv_user_page(vma, eaddr);
 }
 
-void flush_dcache_page(struct page *page)
+static void sh5_flush_dcache_page(void *page)
 {
-	sh64_dcache_purge_phy_page(page_to_phys(page));
+	sh64_dcache_purge_phy_page(page_to_phys((struct page *)page));
 	wmb();
 }
 
 /*
- * Flush the range [start,end] of kernel virtual adddress space from
+ * Flush the range [start,end] of kernel virtual address space from
  * the I-cache.  The corresponding range must be purged from the
  * D-cache also because the SH-5 doesn't have cache snooping between
  * the caches.  The addresses will be visible through the superpage
  * mapping, therefore it's guaranteed that there no cache entries for
  * the range in cache sets of the wrong colour.
  */
-void flush_icache_range(unsigned long start, unsigned long end)
+static void sh5_flush_icache_range(void *args)
 {
+	struct flusher_data *data = args;
+	unsigned long start, end;
+
+	start = data->addr1;
+	end = data->addr2;
+
 	__flush_purge_region((void *)start, end);
 	wmb();
 	sh64_icache_inv_kernel_range(start, end);
 }
 
 /*
- * Flush the range of user (defined by vma->vm_mm) address space starting
- * at 'addr' for 'len' bytes from the cache.  The range does not straddle
- * a page boundary, the unique physical page containing the range is
- * 'page'.  This seems to be used mainly for invalidating an address
- * range following a poke into the program text through the ptrace() call
- * from another process (e.g. for BRK instruction insertion).
- */
-void flush_icache_user_range(struct vm_area_struct *vma,
-			struct page *page, unsigned long addr, int len)
-{
-
-	sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
-	mb();
-
-	if (vma->vm_flags & VM_EXEC)
-		sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
-}
-
-/*
  * For the address range [start,end), write back the data from the
  * D-cache and invalidate the corresponding region of the I-cache for the
  * current process.  Used to flush signal trampolines on the stack to
  * make them executable.
  */
-void flush_cache_sigtramp(unsigned long vaddr)
+static void sh5_flush_cache_sigtramp(void *vaddr)
 {
-	unsigned long end = vaddr + L1_CACHE_BYTES;
+	unsigned long end = (unsigned long)vaddr + L1_CACHE_BYTES;
 
-	__flush_wback_region((void *)vaddr, L1_CACHE_BYTES);
+	__flush_wback_region(vaddr, L1_CACHE_BYTES);
 	wmb();
-	sh64_icache_inv_current_user_range(vaddr, end);
-}
-
-/*
- * These *MUST* lie in an area of virtual address space that's otherwise
- * unused.
- */
-#define UNIQUE_EADDR_START 0xe0000000UL
-#define UNIQUE_EADDR_END   0xe8000000UL
-
-/*
- * Given a physical address paddr, and a user virtual address user_eaddr
- * which will eventually be mapped to it, create a one-off kernel-private
- * eaddr mapped to the same paddr.  This is used for creating special
- * destination pages for copy_user_page and clear_user_page.
- */
-static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr,
-					    unsigned long paddr)
-{
-	static unsigned long current_pointer = UNIQUE_EADDR_START;
-	unsigned long coloured_pointer;
-
-	if (current_pointer == UNIQUE_EADDR_END) {
-		sh64_dcache_purge_all();
-		current_pointer = UNIQUE_EADDR_START;
-	}
-
-	coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) |
-				(user_eaddr & CACHE_OC_SYN_MASK);
-	sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
-
-	current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
-
-	return coloured_pointer;
-}
-
-static void sh64_copy_user_page_coloured(void *to, void *from,
-					 unsigned long address)
-{
-	void *coloured_to;
-
-	/*
-	 * Discard any existing cache entries of the wrong colour.  These are
-	 * present quite often, if the kernel has recently used the page
-	 * internally, then given it up, then it's been allocated to the user.
-	 */
-	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to);
-
-	coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to));
-	copy_page(from, coloured_to);
-
-	sh64_teardown_dtlb_cache_slot();
-}
-
-static void sh64_clear_user_page_coloured(void *to, unsigned long address)
-{
-	void *coloured_to;
-
-	/*
-	 * Discard any existing kernel-originated lines of the wrong
-	 * colour (as above)
-	 */
-	sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long)to);
-
-	coloured_to = (void *)sh64_make_unique_eaddr(address, __pa(to));
-	clear_page(coloured_to);
-
-	sh64_teardown_dtlb_cache_slot();
+	sh64_icache_inv_current_user_range((unsigned long)vaddr, end);
 }
 
-/*
- * 'from' and 'to' are kernel virtual addresses (within the superpage
- * mapping of the physical RAM).  'address' is the user virtual address
- * where the copy 'to' will be mapped after.  This allows a custom
- * mapping to be used to ensure that the new copy is placed in the
- * right cache sets for the user to see it without having to bounce it
- * out via memory.  Note however : the call to flush_page_to_ram in
- * (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
- * very important case!
- *
- * TBD : can we guarantee that on every call, any cache entries for
- * 'from' are in the same colour sets as 'address' also?  i.e. is this
- * always used just to deal with COW?  (I suspect not).
- *
- * There are two possibilities here for when the page 'from' was last accessed:
- * - by the kernel : this is OK, no purge required.
- * - by the/a user (e.g. for break_COW) : need to purge.
- *
- * If the potential user mapping at 'address' is the same colour as
- * 'from' there is no need to purge any cache lines from the 'from'
- * page mapped into cache sets of colour 'address'.  (The copy will be
- * accessing the page through 'from').
- */
-void copy_user_page(void *to, void *from, unsigned long address,
-		    struct page *page)
+void __init sh5_cache_init(void)
 {
-	if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0)
-		sh64_dcache_purge_coloured_phy_page(__pa(from), address);
+	local_flush_cache_all		= sh5_flush_cache_all;
+	local_flush_cache_mm		= sh5_flush_cache_mm;
+	local_flush_cache_dup_mm	= sh5_flush_cache_mm;
+	local_flush_cache_page		= sh5_flush_cache_page;
+	local_flush_cache_range		= sh5_flush_cache_range;
+	local_flush_dcache_page		= sh5_flush_dcache_page;
+	local_flush_icache_range	= sh5_flush_icache_range;
+	local_flush_cache_sigtramp	= sh5_flush_cache_sigtramp;
 
-	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0)
-		copy_page(to, from);
-	else
-		sh64_copy_user_page_coloured(to, from, address);
-}
+	/* Reserve a slot for dcache colouring in the DTLB */
+	dtlb_cache_slot	= sh64_get_wired_dtlb_entry();
 
-/*
- * 'to' is a kernel virtual address (within the superpage mapping of the
- * physical RAM).  'address' is the user virtual address where the 'to'
- * page will be mapped after.  This allows a custom mapping to be used to
- * ensure that the new copy is placed in the right cache sets for the
- * user to see it without having to bounce it out via memory.
- */
-void clear_user_page(void *to, unsigned long address, struct page *page)
-{
-	if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0)
-		clear_page(to);
-	else
-		sh64_clear_user_page_coloured(to, address);
+	sh4__flush_region_init();
 }
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index 22dacc77882..7729cca727e 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/fs.h>
 #include <linux/threads.h>
 #include <asm/addrspace.h>
 #include <asm/page.h>
@@ -47,10 +48,10 @@ static inline void cache_wback_all(void)
 			unsigned long data;
 			int v = SH_CACHE_UPDATED | SH_CACHE_VALID;
 
-			data = ctrl_inl(addr);
+			data = __raw_readl(addr);
 
 			if ((data & v) == v)
-				ctrl_outl(data & ~v, addr);
+				__raw_writel(data & ~v, addr);
 
 		}
 
@@ -63,15 +64,21 @@ static inline void cache_wback_all(void)
  *
  * Called from kernel/module.c:sys_init_module and routine for a.out format.
  */
-void flush_icache_range(unsigned long start, unsigned long end)
+static void sh7705_flush_icache_range(void *args)
 {
+	struct flusher_data *data = args;
+	unsigned long start, end;
+
+	start = data->addr1;
+	end = data->addr2;
+
 	__flush_wback_region((void *)start, end - start);
 }
 
 /*
  * Writeback&Invalidate the D-cache of the page
  */
-static void __uses_jump_to_uncached __flush_dcache_page(unsigned long phys)
+static void __flush_dcache_page(unsigned long phys)
 {
 	unsigned long ways, waysize, addrstart;
 	unsigned long flags;
@@ -108,10 +115,10 @@ static void __uses_jump_to_uncached __flush_dcache_page(unsigned long phys)
 		     addr += current_cpu_data.dcache.linesz) {
 			unsigned long data;
 
-			data = ctrl_inl(addr) & (0x1ffffC00 | SH_CACHE_VALID);
+			data = __raw_readl(addr) & (0x1ffffC00 | SH_CACHE_VALID);
 		        if (data == phys) {
 				data &= ~(SH_CACHE_VALID | SH_CACHE_UPDATED);
-				ctrl_outl(data, addr);
+				__raw_writel(data, addr);
 			}
 		}
 
@@ -126,13 +133,18 @@ static void __uses_jump_to_uncached __flush_dcache_page(unsigned long phys)
  * Write back & invalidate the D-cache of the page.
  * (To avoid "alias" issues)
  */
-void flush_dcache_page(struct page *page)
+static void sh7705_flush_dcache_page(void *arg)
 {
-	if (test_bit(PG_mapped, &page->flags))
-		__flush_dcache_page(PHYSADDR(page_address(page)));
+	struct page *page = arg;
+	struct address_space *mapping = page_mapping(page);
+
+	if (mapping && !mapping_mapped(mapping))
+		clear_bit(PG_dcache_clean, &page->flags);
+	else
+		__flush_dcache_page(__pa(page_address(page)));
 }
 
-void __uses_jump_to_uncached flush_cache_all(void)
+static void sh7705_flush_cache_all(void *args)
 {
 	unsigned long flags;
 
@@ -144,44 +156,16 @@ void __uses_jump_to_uncached flush_cache_all(void)
 	local_irq_restore(flags);
 }
 
-void flush_cache_mm(struct mm_struct *mm)
-{
-	/* Is there any good way? */
-	/* XXX: possibly call flush_cache_range for each vm area */
-	flush_cache_all();
-}
-
-/*
- * Write back and invalidate D-caches.
- *
- * START, END: Virtual Address (U0 address)
- *
- * NOTE: We need to flush the _physical_ page entry.
- * Flushing the cache lines for U0 only isn't enough.
- * We need to flush for P1 too, which may contain aliases.
- */
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
-		       unsigned long end)
-{
-
-	/*
-	 * We could call flush_cache_page for the pages of these range,
-	 * but it's not efficient (scan the caches all the time...).
-	 *
-	 * We can't use A-bit magic, as there's the case we don't have
-	 * valid entry on TLB.
-	 */
-	flush_cache_all();
-}
-
 /*
  * Write back and invalidate I/D-caches for the page.
  *
  * ADDRESS: Virtual Address (U0 address)
  */
-void flush_cache_page(struct vm_area_struct *vma, unsigned long address,
-		      unsigned long pfn)
+static void sh7705_flush_cache_page(void *args)
 {
+	struct flusher_data *data = args;
+	unsigned long pfn = data->addr2;
+
 	__flush_dcache_page(pfn << PAGE_SHIFT);
 }
 
@@ -193,7 +177,19 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long address,
  * Not entirely sure why this is necessary on SH3 with 32K cache but
  * without it we get occasional "Memory fault" when loading a program.
  */
-void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+static void sh7705_flush_icache_page(void *page)
 {
 	__flush_purge_region(page_address(page), PAGE_SIZE);
 }
+
+void __init sh7705_cache_init(void)
+{
+	local_flush_icache_range	= sh7705_flush_icache_range;
+	local_flush_dcache_page		= sh7705_flush_dcache_page;
+	local_flush_cache_all		= sh7705_flush_cache_all;
+	local_flush_cache_mm		= sh7705_flush_cache_all;
+	local_flush_cache_dup_mm	= sh7705_flush_cache_all;
+	local_flush_cache_range		= sh7705_flush_cache_all;
+	local_flush_cache_page		= sh7705_flush_cache_page;
+	local_flush_icache_page		= sh7705_flush_icache_page;
+}
diff --git a/arch/sh/mm/cache-shx3.c b/arch/sh/mm/cache-shx3.c
new file mode 100644
index 00000000000..24c58b7dc02
--- /dev/null
+++ b/arch/sh/mm/cache-shx3.c
@@ -0,0 +1,44 @@
+/*
+ * arch/sh/mm/cache-shx3.c - SH-X3 optimized cache ops
+ *
+ * Copyright (C) 2010  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <asm/cache.h>
+
+#define CCR_CACHE_SNM	0x40000		/* Hardware-assisted synonym avoidance */
+#define CCR_CACHE_IBE	0x1000000	/* ICBI broadcast */
+
+void __init shx3_cache_init(void)
+{
+	unsigned int ccr;
+
+	ccr = __raw_readl(SH_CCR);
+
+	/*
+	 * If we've got cache aliases, resolve them in hardware.
+	 */
+	if (boot_cpu_data.dcache.n_aliases || boot_cpu_data.icache.n_aliases) {
+		ccr |= CCR_CACHE_SNM;
+
+		boot_cpu_data.icache.n_aliases = 0;
+		boot_cpu_data.dcache.n_aliases = 0;
+
+		pr_info("Enabling hardware synonym avoidance\n");
+	}
+
+#ifdef CONFIG_SMP
+	/*
+	 * Broadcast I-cache block invalidations by default.
+	 */
+	ccr |= CCR_CACHE_IBE;
+#endif
+
+	writel_uncached(ccr, SH_CCR);
+}
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
new file mode 100644
index 00000000000..097c2cdd117
--- /dev/null
+++ b/arch/sh/mm/cache.c
@@ -0,0 +1,355 @@
+/*
+ * arch/sh/mm/cache.c
+ *
+ * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
+ * Copyright (C) 2002 - 2010  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+void (*local_flush_cache_all)(void *args) = cache_noop;
+void (*local_flush_cache_mm)(void *args) = cache_noop;
+void (*local_flush_cache_dup_mm)(void *args) = cache_noop;
+void (*local_flush_cache_page)(void *args) = cache_noop;
+void (*local_flush_cache_range)(void *args) = cache_noop;
+void (*local_flush_dcache_page)(void *args) = cache_noop;
+void (*local_flush_icache_range)(void *args) = cache_noop;
+void (*local_flush_icache_page)(void *args) = cache_noop;
+void (*local_flush_cache_sigtramp)(void *args) = cache_noop;
+
+void (*__flush_wback_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_wback_region);
+void (*__flush_purge_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_purge_region);
+void (*__flush_invalidate_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_invalidate_region);
+
+static inline void noop__flush_region(void *start, int size)
+{
+}
+
+static inline void cacheop_on_each_cpu(void (*func) (void *info), void *info,
+                                   int wait)
+{
+	preempt_disable();
+
+	/*
+	 * It's possible that this gets called early on when IRQs are
+	 * still disabled due to ioremapping by the boot CPU, so don't
+	 * even attempt IPIs unless there are other CPUs online.
+	 */
+	if (num_online_cpus() > 1)
+		smp_call_function(func, info, wait);
+
+	func(info);
+
+	preempt_enable();
+}
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, const void *src,
+		       unsigned long len)
+{
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+	    test_bit(PG_dcache_clean, &page->flags)) {
+		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(vto, src, len);
+		kunmap_coherent(vto);
+	} else {
+		memcpy(dst, src, len);
+		if (boot_cpu_data.dcache.n_aliases)
+			clear_bit(PG_dcache_clean, &page->flags);
+	}
+
+	if (vma->vm_flags & VM_EXEC)
+		flush_cache_page(vma, vaddr, page_to_pfn(page));
+}
+
+void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+			 unsigned long vaddr, void *dst, const void *src,
+			 unsigned long len)
+{
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+	    test_bit(PG_dcache_clean, &page->flags)) {
+		void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(dst, vfrom, len);
+		kunmap_coherent(vfrom);
+	} else {
+		memcpy(dst, src, len);
+		if (boot_cpu_data.dcache.n_aliases)
+			clear_bit(PG_dcache_clean, &page->flags);
+	}
+}
+
+void copy_user_highpage(struct page *to, struct page *from,
+			unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *vfrom, *vto;
+
+	vto = kmap_atomic(to);
+
+	if (boot_cpu_data.dcache.n_aliases && page_mapped(from) &&
+	    test_bit(PG_dcache_clean, &from->flags)) {
+		vfrom = kmap_coherent(from, vaddr);
+		copy_page(vto, vfrom);
+		kunmap_coherent(vfrom);
+	} else {
+		vfrom = kmap_atomic(from);
+		copy_page(vto, vfrom);
+		kunmap_atomic(vfrom);
+	}
+
+	if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK) ||
+	    (vma->vm_flags & VM_EXEC))
+		__flush_purge_region(vto, PAGE_SIZE);
+
+	kunmap_atomic(vto);
+	/* Make sure this page is cleared on other CPU's too before using it */
+	smp_wmb();
+}
+EXPORT_SYMBOL(copy_user_highpage);
+
+void clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *kaddr = kmap_atomic(page);
+
+	clear_page(kaddr);
+
+	if (pages_do_alias((unsigned long)kaddr, vaddr & PAGE_MASK))
+		__flush_purge_region(kaddr, PAGE_SIZE);
+
+	kunmap_atomic(kaddr);
+}
+EXPORT_SYMBOL(clear_user_highpage);
+
+void __update_cache(struct vm_area_struct *vma,
+		    unsigned long address, pte_t pte)
+{
+	struct page *page;
+	unsigned long pfn = pte_pfn(pte);
+
+	if (!boot_cpu_data.dcache.n_aliases)
+		return;
+
+	page = pfn_to_page(pfn);
+	if (pfn_valid(pfn)) {
+		int dirty = !test_and_set_bit(PG_dcache_clean, &page->flags);
+		if (dirty)
+			__flush_purge_region(page_address(page), PAGE_SIZE);
+	}
+}
+
+void __flush_anon_page(struct page *page, unsigned long vmaddr)
+{
+	unsigned long addr = (unsigned long) page_address(page);
+
+	if (pages_do_alias(addr, vmaddr)) {
+		if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
+		    test_bit(PG_dcache_clean, &page->flags)) {
+			void *kaddr;
+
+			kaddr = kmap_coherent(page, vmaddr);
+			/* XXX.. For now kunmap_coherent() does a purge */
+			/* __flush_purge_region((void *)kaddr, PAGE_SIZE); */
+			kunmap_coherent(kaddr);
+		} else
+			__flush_purge_region((void *)addr, PAGE_SIZE);
+	}
+}
+
+void flush_cache_all(void)
+{
+	cacheop_on_each_cpu(local_flush_cache_all, NULL, 1);
+}
+EXPORT_SYMBOL(flush_cache_all);
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
+	cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
+}
+
+void flush_cache_dup_mm(struct mm_struct *mm)
+{
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
+	cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
+}
+
+void flush_cache_page(struct vm_area_struct *vma, unsigned long addr,
+		      unsigned long pfn)
+{
+	struct flusher_data data;
+
+	data.vma = vma;
+	data.addr1 = addr;
+	data.addr2 = pfn;
+
+	cacheop_on_each_cpu(local_flush_cache_page, (void *)&data, 1);
+}
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end)
+{
+	struct flusher_data data;
+
+	data.vma = vma;
+	data.addr1 = start;
+	data.addr2 = end;
+
+	cacheop_on_each_cpu(local_flush_cache_range, (void *)&data, 1);
+}
+EXPORT_SYMBOL(flush_cache_range);
+
+void flush_dcache_page(struct page *page)
+{
+	cacheop_on_each_cpu(local_flush_dcache_page, page, 1);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+	struct flusher_data data;
+
+	data.vma = NULL;
+	data.addr1 = start;
+	data.addr2 = end;
+
+	cacheop_on_each_cpu(local_flush_icache_range, (void *)&data, 1);
+}
+
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	/* Nothing uses the VMA, so just pass the struct page along */
+	cacheop_on_each_cpu(local_flush_icache_page, page, 1);
+}
+
+void flush_cache_sigtramp(unsigned long address)
+{
+	cacheop_on_each_cpu(local_flush_cache_sigtramp, (void *)address, 1);
+}
+
+static void compute_alias(struct cache_info *c)
+{
+	c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1);
+	c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0;
+}
+
+static void __init emit_cache_params(void)
+{
+	printk(KERN_NOTICE "I-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+		boot_cpu_data.icache.ways,
+		boot_cpu_data.icache.sets,
+		boot_cpu_data.icache.way_incr);
+	printk(KERN_NOTICE "I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+		boot_cpu_data.icache.entry_mask,
+		boot_cpu_data.icache.alias_mask,
+		boot_cpu_data.icache.n_aliases);
+	printk(KERN_NOTICE "D-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+		boot_cpu_data.dcache.ways,
+		boot_cpu_data.dcache.sets,
+		boot_cpu_data.dcache.way_incr);
+	printk(KERN_NOTICE "D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+		boot_cpu_data.dcache.entry_mask,
+		boot_cpu_data.dcache.alias_mask,
+		boot_cpu_data.dcache.n_aliases);
+
+	/*
+	 * Emit Secondary Cache parameters if the CPU has a probed L2.
+	 */
+	if (boot_cpu_data.flags & CPU_HAS_L2_CACHE) {
+		printk(KERN_NOTICE "S-cache : n_ways=%d n_sets=%d way_incr=%d\n",
+			boot_cpu_data.scache.ways,
+			boot_cpu_data.scache.sets,
+			boot_cpu_data.scache.way_incr);
+		printk(KERN_NOTICE "S-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n",
+			boot_cpu_data.scache.entry_mask,
+			boot_cpu_data.scache.alias_mask,
+			boot_cpu_data.scache.n_aliases);
+	}
+}
+
+void __init cpu_cache_init(void)
+{
+	unsigned int cache_disabled = 0;
+
+#ifdef SH_CCR
+	cache_disabled = !(__raw_readl(SH_CCR) & CCR_CACHE_ENABLE);
+#endif
+
+	compute_alias(&boot_cpu_data.icache);
+	compute_alias(&boot_cpu_data.dcache);
+	compute_alias(&boot_cpu_data.scache);
+
+	__flush_wback_region		= noop__flush_region;
+	__flush_purge_region		= noop__flush_region;
+	__flush_invalidate_region	= noop__flush_region;
+
+	/*
+	 * No flushing is necessary in the disabled cache case so we can
+	 * just keep the noop functions in local_flush_..() and __flush_..()
+	 */
+	if (unlikely(cache_disabled))
+		goto skip;
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH2) {
+		extern void __weak sh2_cache_init(void);
+
+		sh2_cache_init();
+	}
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH2A) {
+		extern void __weak sh2a_cache_init(void);
+
+		sh2a_cache_init();
+	}
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH3) {
+		extern void __weak sh3_cache_init(void);
+
+		sh3_cache_init();
+
+		if ((boot_cpu_data.type == CPU_SH7705) &&
+		    (boot_cpu_data.dcache.sets == 512)) {
+			extern void __weak sh7705_cache_init(void);
+
+			sh7705_cache_init();
+		}
+	}
+
+	if ((boot_cpu_data.family == CPU_FAMILY_SH4) ||
+	    (boot_cpu_data.family == CPU_FAMILY_SH4A) ||
+	    (boot_cpu_data.family == CPU_FAMILY_SH4AL_DSP)) {
+		extern void __weak sh4_cache_init(void);
+
+		sh4_cache_init();
+
+		if ((boot_cpu_data.type == CPU_SH7786) ||
+		    (boot_cpu_data.type == CPU_SHX3)) {
+			extern void __weak shx3_cache_init(void);
+
+			shx3_cache_init();
+		}
+	}
+
+	if (boot_cpu_data.family == CPU_FAMILY_SH5) {
+		extern void __weak sh5_cache_init(void);
+
+		sh5_cache_init();
+	}
+
+skip:
+	emit_cache_params();
+}
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index d3c33fc5b1c..b81d9dbf9fe 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -10,178 +10,150 @@
  * for more details.
  */
 #include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma-debug.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/gfp.h>
 #include <asm/cacheflush.h>
 #include <asm/addrspace.h>
-#include <asm/io.h>
-
-struct dma_coherent_mem {
-	void		*virt_base;
-	u32		device_base;
-	int		size;
-	int		flags;
-	unsigned long	*bitmap;
-};
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, gfp_t gfp)
+
+#define PREALLOC_DMA_DEBUG_ENTRIES	4096
+
+struct dma_map_ops *dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
+static int __init dma_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+fs_initcall(dma_init);
+
+void *dma_generic_alloc_coherent(struct device *dev, size_t size,
+				 dma_addr_t *dma_handle, gfp_t gfp,
+				 struct dma_attrs *attrs)
 {
 	void *ret, *ret_nocache;
-	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
 	int order = get_order(size);
 
-	if (mem) {
-		int page = bitmap_find_free_region(mem->bitmap, mem->size,
-						     order);
-		if (page >= 0) {
-			*dma_handle = mem->device_base + (page << PAGE_SHIFT);
-			ret = mem->virt_base + (page << PAGE_SHIFT);
-			memset(ret, 0, size);
-			return ret;
-		}
-		if (mem->flags & DMA_MEMORY_EXCLUSIVE)
-			return NULL;
-	}
+	gfp |= __GFP_ZERO;
 
 	ret = (void *)__get_free_pages(gfp, order);
 	if (!ret)
 		return NULL;
 
-	memset(ret, 0, size);
 	/*
 	 * Pages from the page allocator may have data present in
 	 * cache. So flush the cache before using uncached memory.
 	 */
 	dma_cache_sync(dev, ret, size, DMA_BIDIRECTIONAL);
 
-	ret_nocache = ioremap_nocache(virt_to_phys(ret), size);
+	ret_nocache = (void __force *)ioremap_nocache(virt_to_phys(ret), size);
 	if (!ret_nocache) {
 		free_pages((unsigned long)ret, order);
 		return NULL;
 	}
 
+	split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order);
+
 	*dma_handle = virt_to_phys(ret);
+
 	return ret_nocache;
 }
-EXPORT_SYMBOL(dma_alloc_coherent);
 
-void dma_free_coherent(struct device *dev, size_t size,
-			 void *vaddr, dma_addr_t dma_handle)
+void dma_generic_free_coherent(struct device *dev, size_t size,
+			       void *vaddr, dma_addr_t dma_handle,
+			       struct dma_attrs *attrs)
 {
-	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
 	int order = get_order(size);
+	unsigned long pfn = dma_handle >> PAGE_SHIFT;
+	int k;
 
-	if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
-		int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
+	for (k = 0; k < (1 << order); k++)
+		__free_pages(pfn_to_page(pfn + k), 0);
 
-		bitmap_release_region(mem->bitmap, page, order);
-	} else {
-		WARN_ON(irqs_disabled());	/* for portability */
-		BUG_ON(mem && mem->flags & DMA_MEMORY_EXCLUSIVE);
-		free_pages((unsigned long)phys_to_virt(dma_handle), order);
-		iounmap(vaddr);
-	}
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
-				dma_addr_t device_addr, size_t size, int flags)
-{
-	void __iomem *mem_base = NULL;
-	int pages = size >> PAGE_SHIFT;
-	int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
-
-	if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
-		goto out;
-	if (!size)
-		goto out;
-	if (dev->dma_mem)
-		goto out;
-
-	/* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
-
-	mem_base = ioremap_nocache(bus_addr, size);
-	if (!mem_base)
-		goto out;
-
-	dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
-	if (!dev->dma_mem)
-		goto out;
-	dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-	if (!dev->dma_mem->bitmap)
-		goto free1_out;
-
-	dev->dma_mem->virt_base = mem_base;
-	dev->dma_mem->device_base = device_addr;
-	dev->dma_mem->size = pages;
-	dev->dma_mem->flags = flags;
-
-	if (flags & DMA_MEMORY_MAP)
-		return DMA_MEMORY_MAP;
-
-	return DMA_MEMORY_IO;
-
- free1_out:
-	kfree(dev->dma_mem);
- out:
-	if (mem_base)
-		iounmap(mem_base);
-	return 0;
+	iounmap(vaddr);
 }
-EXPORT_SYMBOL(dma_declare_coherent_memory);
-
-void dma_release_declared_memory(struct device *dev)
-{
-	struct dma_coherent_mem *mem = dev->dma_mem;
-
-	if (!mem)
-		return;
-	dev->dma_mem = NULL;
-	iounmap(mem->virt_base);
-	kfree(mem->bitmap);
-	kfree(mem);
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-
-void *dma_mark_declared_memory_occupied(struct device *dev,
-					dma_addr_t device_addr, size_t size)
-{
-	struct dma_coherent_mem *mem = dev->dma_mem;
-	int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	int pos, err;
-
-	if (!mem)
-		return ERR_PTR(-EINVAL);
-
-	pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
-	err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
-	if (err != 0)
-		return ERR_PTR(err);
-	return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
 
 void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 		    enum dma_data_direction direction)
 {
-#ifdef CONFIG_CPU_SH5
-	void *p1addr = vaddr;
-#else
-	void *p1addr = (void*) P1SEGADDR((unsigned long)vaddr);
-#endif
+	void *addr;
+
+	addr = __in_29bit_mode() ?
+	       (void *)CAC_ADDR((unsigned long)vaddr) : vaddr;
 
 	switch (direction) {
 	case DMA_FROM_DEVICE:		/* invalidate only */
-		__flush_invalidate_region(p1addr, size);
+		__flush_invalidate_region(addr, size);
 		break;
 	case DMA_TO_DEVICE:		/* writeback only */
-		__flush_wback_region(p1addr, size);
+		__flush_wback_region(addr, size);
 		break;
 	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
-		__flush_purge_region(p1addr, size);
+		__flush_purge_region(addr, size);
 		break;
 	default:
 		BUG();
 	}
 }
 EXPORT_SYMBOL(dma_cache_sync);
+
+static int __init memchunk_setup(char *str)
+{
+	return 1; /* accept anything that begins with "memchunk." */
+}
+__setup("memchunk.", memchunk_setup);
+
+static void __init memchunk_cmdline_override(char *name, unsigned long *sizep)
+{
+	char *p = boot_command_line;
+	int k = strlen(name);
+
+	while ((p = strstr(p, "memchunk."))) {
+		p += 9; /* strlen("memchunk.") */
+		if (!strncmp(name, p, k) && p[k] == '=') {
+			p += k + 1;
+			*sizep = memparse(p, NULL);
+			pr_info("%s: forcing memory chunk size to 0x%08lx\n",
+				name, *sizep);
+			break;
+		}
+	}
+}
+
+int __init platform_resource_setup_memory(struct platform_device *pdev,
+					  char *name, unsigned long memsize)
+{
+	struct resource *r;
+	dma_addr_t dma_handle;
+	void *buf;
+
+	r = pdev->resource + pdev->num_resources - 1;
+	if (r->flags) {
+		pr_warning("%s: unable to find empty space for resource\n",
+			name);
+		return -EINVAL;
+	}
+
+	memchunk_cmdline_override(name, &memsize);
+	if (!memsize)
+		return 0;
+
+	buf = dma_alloc_coherent(NULL, memsize, &dma_handle, GFP_KERNEL);
+	if (!buf) {
+		pr_warning("%s: unable to allocate memory\n", name);
+		return -ENOMEM;
+	}
+
+	memset(buf, 0, memsize);
+
+	r->flags = IORESOURCE_MEM;
+	r->start = dma_handle;
+	r->end = r->start + memsize - 1;
+	r->name = name;
+	return 0;
+}
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
new file mode 100644
index 00000000000..541dc610150
--- /dev/null
+++ b/arch/sh/mm/fault.c
@@ -0,0 +1,517 @@
+/*
+ * Page fault handler for SH with an MMU.
+ *
+ *  Copyright (C) 1999  Niibe Yutaka
+ *  Copyright (C) 2003 - 2012  Paul Mundt
+ *
+ *  Based on linux/arch/i386/mm/fault.c:
+ *   Copyright (C) 1995  Linus Torvalds
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/perf_event.h>
+#include <linux/kdebug.h>
+#include <asm/io_trapped.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/traps.h>
+
+static inline int notify_page_fault(struct pt_regs *regs, int trap)
+{
+	int ret = 0;
+
+	if (kprobes_built_in() && !user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, trap))
+			ret = 1;
+		preempt_enable();
+	}
+
+	return ret;
+}
+
+static void
+force_sig_info_fault(int si_signo, int si_code, unsigned long address,
+		     struct task_struct *tsk)
+{
+	siginfo_t info;
+
+	info.si_signo	= si_signo;
+	info.si_errno	= 0;
+	info.si_code	= si_code;
+	info.si_addr	= (void __user *)address;
+
+	force_sig_info(si_signo, &info, tsk);
+}
+
+/*
+ * This is useful to dump out the page tables associated with
+ * 'addr' in mm 'mm'.
+ */
+static void show_pte(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+
+	if (mm) {
+		pgd = mm->pgd;
+	} else {
+		pgd = get_TTB();
+
+		if (unlikely(!pgd))
+			pgd = swapper_pg_dir;
+	}
+
+	printk(KERN_ALERT "pgd = %p\n", pgd);
+	pgd += pgd_index(addr);
+	printk(KERN_ALERT "[%08lx] *pgd=%0*Lx", addr,
+	       (u32)(sizeof(*pgd) * 2), (u64)pgd_val(*pgd));
+
+	do {
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		if (pgd_none(*pgd))
+			break;
+
+		if (pgd_bad(*pgd)) {
+			printk("(bad)");
+			break;
+		}
+
+		pud = pud_offset(pgd, addr);
+		if (PTRS_PER_PUD != 1)
+			printk(", *pud=%0*Lx", (u32)(sizeof(*pud) * 2),
+			       (u64)pud_val(*pud));
+
+		if (pud_none(*pud))
+			break;
+
+		if (pud_bad(*pud)) {
+			printk("(bad)");
+			break;
+		}
+
+		pmd = pmd_offset(pud, addr);
+		if (PTRS_PER_PMD != 1)
+			printk(", *pmd=%0*Lx", (u32)(sizeof(*pmd) * 2),
+			       (u64)pmd_val(*pmd));
+
+		if (pmd_none(*pmd))
+			break;
+
+		if (pmd_bad(*pmd)) {
+			printk("(bad)");
+			break;
+		}
+
+		/* We must not map this if we have highmem enabled */
+		if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
+			break;
+
+		pte = pte_offset_kernel(pmd, addr);
+		printk(", *pte=%0*Lx", (u32)(sizeof(*pte) * 2),
+		       (u64)pte_val(*pte));
+	} while (0);
+
+	printk("\n");
+}
+
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+{
+	unsigned index = pgd_index(address);
+	pgd_t *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+
+	pgd += index;
+	pgd_k = init_mm.pgd + index;
+
+	if (!pgd_present(*pgd_k))
+		return NULL;
+
+	pud = pud_offset(pgd, address);
+	pud_k = pud_offset(pgd_k, address);
+	if (!pud_present(*pud_k))
+		return NULL;
+
+	if (!pud_present(*pud))
+	    set_pud(pud, *pud_k);
+
+	pmd = pmd_offset(pud, address);
+	pmd_k = pmd_offset(pud_k, address);
+	if (!pmd_present(*pmd_k))
+		return NULL;
+
+	if (!pmd_present(*pmd))
+		set_pmd(pmd, *pmd_k);
+	else {
+		/*
+		 * The page tables are fully synchronised so there must
+		 * be another reason for the fault. Return NULL here to
+		 * signal that we have not taken care of the fault.
+		 */
+		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+		return NULL;
+	}
+
+	return pmd_k;
+}
+
+#ifdef CONFIG_SH_STORE_QUEUES
+#define __FAULT_ADDR_LIMIT	P3_ADDR_MAX
+#else
+#define __FAULT_ADDR_LIMIT	VMALLOC_END
+#endif
+
+/*
+ * Handle a fault on the vmalloc or module mapping area
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+	pgd_t *pgd_k;
+	pmd_t *pmd_k;
+	pte_t *pte_k;
+
+	/* Make sure we are in vmalloc/module/P3 area: */
+	if (!(address >= VMALLOC_START && address < __FAULT_ADDR_LIMIT))
+		return -1;
+
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 *
+	 * Do _not_ use "current" here. We might be inside
+	 * an interrupt in the middle of a task switch..
+	 */
+	pgd_k = get_TTB();
+	pmd_k = vmalloc_sync_one(pgd_k, address);
+	if (!pmd_k)
+		return -1;
+
+	pte_k = pte_offset_kernel(pmd_k, address);
+	if (!pte_present(*pte_k))
+		return -1;
+
+	return 0;
+}
+
+static void
+show_fault_oops(struct pt_regs *regs, unsigned long address)
+{
+	if (!oops_may_print())
+		return;
+
+	printk(KERN_ALERT "BUG: unable to handle kernel ");
+	if (address < PAGE_SIZE)
+		printk(KERN_CONT "NULL pointer dereference");
+	else
+		printk(KERN_CONT "paging request");
+
+	printk(KERN_CONT " at %08lx\n", address);
+	printk(KERN_ALERT "PC:");
+	printk_address(regs->pc, 1);
+
+	show_pte(NULL, address);
+}
+
+static noinline void
+no_context(struct pt_regs *regs, unsigned long error_code,
+	   unsigned long address)
+{
+	/* Are we prepared to handle this kernel fault?  */
+	if (fixup_exception(regs))
+		return;
+
+	if (handle_trapped_io(regs, address))
+		return;
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 */
+	bust_spinlocks(1);
+
+	show_fault_oops(regs, address);
+
+	die("Oops", regs, error_code);
+	bust_spinlocks(0);
+	do_exit(SIGKILL);
+}
+
+static void
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+		       unsigned long address, int si_code)
+{
+	struct task_struct *tsk = current;
+
+	/* User mode accesses just cause a SIGSEGV */
+	if (user_mode(regs)) {
+		/*
+		 * It's possible to have interrupts off here:
+		 */
+		local_irq_enable();
+
+		force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+
+		return;
+	}
+
+	no_context(regs, error_code, address);
+}
+
+static noinline void
+bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
+		     unsigned long address)
+{
+	__bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
+}
+
+static void
+__bad_area(struct pt_regs *regs, unsigned long error_code,
+	   unsigned long address, int si_code)
+{
+	struct mm_struct *mm = current->mm;
+
+	/*
+	 * Something tried to access memory that isn't in our memory map..
+	 * Fix it, but check if it's kernel or user first..
+	 */
+	up_read(&mm->mmap_sem);
+
+	__bad_area_nosemaphore(regs, error_code, address, si_code);
+}
+
+static noinline void
+bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+{
+	__bad_area(regs, error_code, address, SEGV_MAPERR);
+}
+
+static noinline void
+bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
+		      unsigned long address)
+{
+	__bad_area(regs, error_code, address, SEGV_ACCERR);
+}
+
+static void
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+
+	up_read(&mm->mmap_sem);
+
+	/* Kernel mode? Handle exceptions or die: */
+	if (!user_mode(regs))
+		no_context(regs, error_code, address);
+
+	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+}
+
+static noinline int
+mm_fault_error(struct pt_regs *regs, unsigned long error_code,
+	       unsigned long address, unsigned int fault)
+{
+	/*
+	 * Pagefault was interrupted by SIGKILL. We have no reason to
+	 * continue pagefault.
+	 */
+	if (fatal_signal_pending(current)) {
+		if (!(fault & VM_FAULT_RETRY))
+			up_read(&current->mm->mmap_sem);
+		if (!user_mode(regs))
+			no_context(regs, error_code, address);
+		return 1;
+	}
+
+	if (!(fault & VM_FAULT_ERROR))
+		return 0;
+
+	if (fault & VM_FAULT_OOM) {
+		/* Kernel mode? Handle exceptions or die: */
+		if (!user_mode(regs)) {
+			up_read(&current->mm->mmap_sem);
+			no_context(regs, error_code, address);
+			return 1;
+		}
+		up_read(&current->mm->mmap_sem);
+
+		/*
+		 * We ran out of memory, call the OOM killer, and return the
+		 * userspace (which will retry the fault, or kill us if we got
+		 * oom-killed):
+		 */
+		pagefault_out_of_memory();
+	} else {
+		if (fault & VM_FAULT_SIGBUS)
+			do_sigbus(regs, error_code, address);
+		else
+			BUG();
+	}
+
+	return 1;
+}
+
+static inline int access_error(int error_code, struct vm_area_struct *vma)
+{
+	if (error_code & FAULT_CODE_WRITE) {
+		/* write, present and write, not present: */
+		if (unlikely(!(vma->vm_flags & VM_WRITE)))
+			return 1;
+		return 0;
+	}
+
+	/* ITLB miss on NX page */
+	if (unlikely((error_code & FAULT_CODE_ITLB) &&
+		     !(vma->vm_flags & VM_EXEC)))
+		return 1;
+
+	/* read, not present: */
+	if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
+		return 1;
+
+	return 0;
+}
+
+static int fault_in_kernel_space(unsigned long address)
+{
+	return address >= TASK_SIZE;
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
+					unsigned long error_code,
+					unsigned long address)
+{
+	unsigned long vec;
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct * vma;
+	int fault;
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+	tsk = current;
+	mm = tsk->mm;
+	vec = lookup_exception_vector();
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 */
+	if (unlikely(fault_in_kernel_space(address))) {
+		if (vmalloc_fault(address) >= 0)
+			return;
+		if (notify_page_fault(regs, vec))
+			return;
+
+		bad_area_nosemaphore(regs, error_code, address);
+		return;
+	}
+
+	if (unlikely(notify_page_fault(regs, vec)))
+		return;
+
+	/* Only enable interrupts if they were on before the fault */
+	if ((regs->sr & SR_IMASK) != SR_IMASK)
+		local_irq_enable();
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
+	/*
+	 * If we're in an interrupt, have no user context or are running
+	 * in an atomic region then we must not take the fault:
+	 */
+	if (unlikely(in_atomic() || !mm)) {
+		bad_area_nosemaphore(regs, error_code, address);
+		return;
+	}
+
+retry:
+	down_read(&mm->mmap_sem);
+
+	vma = find_vma(mm, address);
+	if (unlikely(!vma)) {
+		bad_area(regs, error_code, address);
+		return;
+	}
+	if (likely(vma->vm_start <= address))
+		goto good_area;
+	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+		bad_area(regs, error_code, address);
+		return;
+	}
+	if (unlikely(expand_stack(vma, address))) {
+		bad_area(regs, error_code, address);
+		return;
+	}
+
+	/*
+	 * Ok, we have a good vm_area for this memory access, so
+	 * we can handle it..
+	 */
+good_area:
+	if (unlikely(access_error(error_code, vma))) {
+		bad_area_access_error(regs, error_code, address);
+		return;
+	}
+
+	set_thread_fault_code(error_code);
+
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (error_code & FAULT_CODE_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(mm, vma, address, flags);
+
+	if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
+		if (mm_fault_error(regs, error_code, address, fault))
+			return;
+
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR) {
+			tsk->maj_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+				      regs, address);
+		} else {
+			tsk->min_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+				      regs, address);
+		}
+		if (fault & VM_FAULT_RETRY) {
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
+
+			/*
+			 * No need to up_read(&mm->mmap_sem) as we would
+			 * have already released it in __lock_page_or_retry
+			 * in mm/filemap.c.
+			 */
+			goto retry;
+		}
+	}
+
+	up_read(&mm->mmap_sem);
+}
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
deleted file mode 100644
index d1fa27594c6..00000000000
--- a/arch/sh/mm/fault_32.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
- * Page fault handler for SH with an MMU.
- *
- *  Copyright (C) 1999  Niibe Yutaka
- *  Copyright (C) 2003 - 2007  Paul Mundt
- *
- *  Based on linux/arch/i386/mm/fault.c:
- *   Copyright (C) 1995  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/hardirq.h>
-#include <linux/kprobes.h>
-#include <asm/io_trapped.h>
-#include <asm/system.h>
-#include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
-#include <asm/kgdb.h>
-
-/*
- * This routine handles page faults.  It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- */
-asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
-					unsigned long writeaccess,
-					unsigned long address)
-{
-	struct task_struct *tsk;
-	struct mm_struct *mm;
-	struct vm_area_struct * vma;
-	int si_code;
-	int fault;
-	siginfo_t info;
-
-	trace_hardirqs_on();
-	local_irq_enable();
-
-#ifdef CONFIG_SH_KGDB
-	if (kgdb_nofault && kgdb_bus_err_hook)
-		kgdb_bus_err_hook();
-#endif
-
-	tsk = current;
-	mm = tsk->mm;
-	si_code = SEGV_MAPERR;
-
-	if (unlikely(address >= TASK_SIZE)) {
-		/*
-		 * Synchronize this task's top level page-table
-		 * with the 'reference' page table.
-		 *
-		 * Do _not_ use "tsk" here. We might be inside
-		 * an interrupt in the middle of a task switch..
-		 */
-		int offset = pgd_index(address);
-		pgd_t *pgd, *pgd_k;
-		pud_t *pud, *pud_k;
-		pmd_t *pmd, *pmd_k;
-
-		pgd = get_TTB() + offset;
-		pgd_k = swapper_pg_dir + offset;
-
-		/* This will never happen with the folded page table. */
-		if (!pgd_present(*pgd)) {
-			if (!pgd_present(*pgd_k))
-				goto bad_area_nosemaphore;
-			set_pgd(pgd, *pgd_k);
-			return;
-		}
-
-		pud = pud_offset(pgd, address);
-		pud_k = pud_offset(pgd_k, address);
-		if (pud_present(*pud) || !pud_present(*pud_k))
-			goto bad_area_nosemaphore;
-		set_pud(pud, *pud_k);
-
-		pmd = pmd_offset(pud, address);
-		pmd_k = pmd_offset(pud_k, address);
-		if (pmd_present(*pmd) || !pmd_present(*pmd_k))
-			goto bad_area_nosemaphore;
-		set_pmd(pmd, *pmd_k);
-
-		return;
-	}
-
-	/*
-	 * If we're in an interrupt or have no user
-	 * context, we must not take the fault..
-	 */
-	if (in_atomic() || !mm)
-		goto no_context;
-
-	down_read(&mm->mmap_sem);
-
-	vma = find_vma(mm, address);
-	if (!vma)
-		goto bad_area;
-	if (vma->vm_start <= address)
-		goto good_area;
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
-	if (expand_stack(vma, address))
-		goto bad_area;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
-	si_code = SEGV_ACCERR;
-	if (writeaccess) {
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
-	} else {
-		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
-			goto bad_area;
-	}
-
-	/*
-	 * If for any reason at all we couldn't handle the fault,
-	 * make sure we exit gracefully rather than endlessly redo
-	 * the fault.
-	 */
-survive:
-	fault = handle_mm_fault(mm, vma, address, writeaccess);
-	if (unlikely(fault & VM_FAULT_ERROR)) {
-		if (fault & VM_FAULT_OOM)
-			goto out_of_memory;
-		else if (fault & VM_FAULT_SIGBUS)
-			goto do_sigbus;
-		BUG();
-	}
-	if (fault & VM_FAULT_MAJOR)
-		tsk->maj_flt++;
-	else
-		tsk->min_flt++;
-
-	up_read(&mm->mmap_sem);
-	return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
-	up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
-	if (user_mode(regs)) {
-		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		info.si_code = si_code;
-		info.si_addr = (void *) address;
-		force_sig_info(SIGSEGV, &info, tsk);
-		return;
-	}
-
-no_context:
-	/* Are we prepared to handle this kernel fault?  */
-	if (fixup_exception(regs))
-		return;
-
-	if (handle_trapped_io(regs, address))
-		return;
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- *
- */
-
-	bust_spinlocks(1);
-
-	if (oops_may_print()) {
-		unsigned long page;
-
-		if (address < PAGE_SIZE)
-			printk(KERN_ALERT "Unable to handle kernel NULL "
-					  "pointer dereference");
-		else
-			printk(KERN_ALERT "Unable to handle kernel paging "
-					  "request");
-		printk(" at virtual address %08lx\n", address);
-		printk(KERN_ALERT "pc = %08lx\n", regs->pc);
-		page = (unsigned long)get_TTB();
-		if (page) {
-			page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT];
-			printk(KERN_ALERT "*pde = %08lx\n", page);
-			if (page & _PAGE_PRESENT) {
-				page &= PAGE_MASK;
-				address &= 0x003ff000;
-				page = ((__typeof__(page) *)
-						__va(page))[address >>
-							    PAGE_SHIFT];
-				printk(KERN_ALERT "*pte = %08lx\n", page);
-			}
-		}
-	}
-
-	die("Oops", regs, writeaccess);
-	bust_spinlocks(0);
-	do_exit(SIGKILL);
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
-	up_read(&mm->mmap_sem);
-	if (is_global_init(current)) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (user_mode(regs))
-		do_group_exit(SIGKILL);
-	goto no_context;
-
-do_sigbus:
-	up_read(&mm->mmap_sem);
-
-	/*
-	 * Send a sigbus, regardless of whether we were in kernel
-	 * or user mode.
-	 */
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRERR;
-	info.si_addr = (void *)address;
-	force_sig_info(SIGBUS, &info, tsk);
-
-	/* Kernel mode? Handle exceptions or die */
-	if (!user_mode(regs))
-		goto no_context;
-}
-
-#ifdef CONFIG_SH_STORE_QUEUES
-/*
- * This is a special case for the SH-4 store queues, as pages for this
- * space still need to be faulted in before it's possible to flush the
- * store queue cache for writeout to the remapped region.
- */
-#define P3_ADDR_MAX		(P4SEG_STORE_QUE + 0x04000000)
-#else
-#define P3_ADDR_MAX		P4SEG
-#endif
-
-/*
- * Called with interrupts disabled.
- */
-asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
-					 unsigned long writeaccess,
-					 unsigned long address)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	pte_t entry;
-
-#ifdef CONFIG_SH_KGDB
-	if (kgdb_nofault && kgdb_bus_err_hook)
-		kgdb_bus_err_hook();
-#endif
-
-	/*
-	 * We don't take page faults for P1, P2, and parts of P4, these
-	 * are always mapped, whether it be due to legacy behaviour in
-	 * 29-bit mode, or due to PMB configuration in 32-bit mode.
-	 */
-	if (address >= P3SEG && address < P3_ADDR_MAX) {
-		pgd = pgd_offset_k(address);
-	} else {
-		if (unlikely(address >= TASK_SIZE || !current->mm))
-			return 1;
-
-		pgd = pgd_offset(current->mm, address);
-	}
-
-	pud = pud_offset(pgd, address);
-	if (pud_none_or_clear_bad(pud))
-		return 1;
-	pmd = pmd_offset(pud, address);
-	if (pmd_none_or_clear_bad(pmd))
-		return 1;
-
-	pte = pte_offset_kernel(pmd, address);
-	entry = *pte;
-	if (unlikely(pte_none(entry) || pte_not_present(entry)))
-		return 1;
-	if (unlikely(writeaccess && !pte_write(entry)))
-		return 1;
-
-	if (writeaccess)
-		entry = pte_mkdirty(entry);
-	entry = pte_mkyoung(entry);
-
-#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
-	/*
-	 * ITLB is not affected by "ldtlb" instruction.
-	 * So, we need to flush the entry by ourselves.
-	 */
-	local_flush_tlb_one(get_asid(), address & PAGE_MASK);
-#endif
-
-	set_pte(pte, entry);
-	update_mmu_cache(NULL, address, entry);
-
-	return 0;
-}
diff --git a/arch/sh/mm/fault_64.c b/arch/sh/mm/fault_64.c
deleted file mode 100644
index 399d53710d2..00000000000
--- a/arch/sh/mm/fault_64.c
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * The SH64 TLB miss.
- *
- * Original code from fault.c
- * Copyright (C) 2000, 2001  Paolo Alberelli
- *
- * Fast PTE->TLB refill path
- * Copyright (C) 2003 Richard.Curnow@superh.com
- *
- * IMPORTANT NOTES :
- * The do_fast_page_fault function is called from a context in entry.S
- * where very few registers have been saved.  In particular, the code in
- * this file must be compiled not to use ANY caller-save registers that
- * are not part of the restricted save set.  Also, it means that code in
- * this file must not make calls to functions elsewhere in the kernel, or
- * else the excepting context will see corruption in its caller-save
- * registers.  Plus, the entry.S save area is non-reentrant, so this code
- * has to run with SR.BL==1, i.e. no interrupts taken inside it and panic
- * on any exception.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <asm/system.h>
-#include <asm/tlb.h>
-#include <asm/io.h>
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/mmu_context.h>
-#include <asm/cpu/registers.h>
-
-/* Callable from fault.c, so not static */
-inline void __do_tlb_refill(unsigned long address,
-                            unsigned long long is_text_not_data, pte_t *pte)
-{
-	unsigned long long ptel;
-	unsigned long long pteh=0;
-	struct tlb_info *tlbp;
-	unsigned long long next;
-
-	/* Get PTEL first */
-	ptel = pte_val(*pte);
-
-	/*
-	 * Set PTEH register
-	 */
-	pteh = address & MMU_VPN_MASK;
-
-	/* Sign extend based on neff. */
-#if (NEFF == 32)
-	/* Faster sign extension */
-	pteh = (unsigned long long)(signed long long)(signed long)pteh;
-#else
-	/* General case */
-	pteh = (pteh & NEFF_SIGN) ? (pteh | NEFF_MASK) : pteh;
-#endif
-
-	/* Set the ASID. */
-	pteh |= get_asid() << PTEH_ASID_SHIFT;
-	pteh |= PTEH_VALID;
-
-	/* Set PTEL register, set_pte has performed the sign extension */
-	ptel &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
-
-	tlbp = is_text_not_data ? &(cpu_data->itlb) : &(cpu_data->dtlb);
-	next = tlbp->next;
-	__flush_tlb_slot(next);
-	asm volatile ("putcfg %0,1,%2\n\n\t"
-		      "putcfg %0,0,%1\n"
-		      :  : "r" (next), "r" (pteh), "r" (ptel) );
-
-	next += TLB_STEP;
-	if (next > tlbp->last) next = tlbp->first;
-	tlbp->next = next;
-
-}
-
-static int handle_vmalloc_fault(struct mm_struct *mm,
-				unsigned long protection_flags,
-                                unsigned long long textaccess,
-				unsigned long address)
-{
-	pgd_t *dir;
-	pud_t *pud;
-	pmd_t *pmd;
-	static pte_t *pte;
-	pte_t entry;
-
-	dir = pgd_offset_k(address);
-
-	pud = pud_offset(dir, address);
-	if (pud_none_or_clear_bad(pud))
-		return 0;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none_or_clear_bad(pmd))
-		return 0;
-
-	pte = pte_offset_kernel(pmd, address);
-	entry = *pte;
-
-	if (pte_none(entry) || !pte_present(entry))
-		return 0;
-	if ((pte_val(entry) & protection_flags) != protection_flags)
-		return 0;
-
-        __do_tlb_refill(address, textaccess, pte);
-
-	return 1;
-}
-
-static int handle_tlbmiss(struct mm_struct *mm,
-			  unsigned long long protection_flags,
-			  unsigned long long textaccess,
-			  unsigned long address)
-{
-	pgd_t *dir;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	pte_t entry;
-
-	/* NB. The PGD currently only contains a single entry - there is no
-	   page table tree stored for the top half of the address space since
-	   virtual pages in that region should never be mapped in user mode.
-	   (In kernel mode, the only things in that region are the 512Mb super
-	   page (locked in), and vmalloc (modules) +  I/O device pages (handled
-	   by handle_vmalloc_fault), so no PGD for the upper half is required
-	   by kernel mode either).
-
-	   See how mm->pgd is allocated and initialised in pgd_alloc to see why
-	   the next test is necessary.  - RPC */
-	if (address >= (unsigned long) TASK_SIZE)
-		/* upper half - never has page table entries. */
-		return 0;
-
-	dir = pgd_offset(mm, address);
-	if (pgd_none(*dir) || !pgd_present(*dir))
-		return 0;
-	if (!pgd_present(*dir))
-		return 0;
-
-	pud = pud_offset(dir, address);
-	if (pud_none(*pud) || !pud_present(*pud))
-		return 0;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd) || !pmd_present(*pmd))
-		return 0;
-
-	pte = pte_offset_kernel(pmd, address);
-	entry = *pte;
-
-	if (pte_none(entry) || !pte_present(entry))
-		return 0;
-
-	/*
-	 * If the page doesn't have sufficient protection bits set to
-	 * service the kind of fault being handled, there's not much
-	 * point doing the TLB refill.  Punt the fault to the general
-	 * handler.
-	 */
-	if ((pte_val(entry) & protection_flags) != protection_flags)
-		return 0;
-
-        __do_tlb_refill(address, textaccess, pte);
-
-	return 1;
-}
-
-/*
- * Put all this information into one structure so that everything is just
- * arithmetic relative to a single base address.  This reduces the number
- * of movi/shori pairs needed just to load addresses of static data.
- */
-struct expevt_lookup {
-	unsigned short protection_flags[8];
-	unsigned char  is_text_access[8];
-	unsigned char  is_write_access[8];
-};
-
-#define PRU (1<<9)
-#define PRW (1<<8)
-#define PRX (1<<7)
-#define PRR (1<<6)
-
-#define DIRTY (_PAGE_DIRTY | _PAGE_ACCESSED)
-#define YOUNG (_PAGE_ACCESSED)
-
-/* Sized as 8 rather than 4 to allow checking the PTE's PRU bit against whether
-   the fault happened in user mode or privileged mode. */
-static struct expevt_lookup expevt_lookup_table = {
-	.protection_flags = {PRX, PRX, 0, 0, PRR, PRR, PRW, PRW},
-	.is_text_access   = {1,   1,   0, 0, 0,   0,   0,   0}
-};
-
-/*
-   This routine handles page faults that can be serviced just by refilling a
-   TLB entry from an existing page table entry.  (This case represents a very
-   large majority of page faults.) Return 1 if the fault was successfully
-   handled.  Return 0 if the fault could not be handled.  (This leads into the
-   general fault handling in fault.c which deals with mapping file-backed
-   pages, stack growth, segmentation faults, swapping etc etc)
- */
-asmlinkage int do_fast_page_fault(unsigned long long ssr_md,
-				  unsigned long long expevt,
-			          unsigned long address)
-{
-	struct task_struct *tsk;
-	struct mm_struct *mm;
-	unsigned long long textaccess;
-	unsigned long long protection_flags;
-	unsigned long long index;
-	unsigned long long expevt4;
-
-	/* The next few lines implement a way of hashing EXPEVT into a
-	 * small array index which can be used to lookup parameters
-	 * specific to the type of TLBMISS being handled.
-	 *
-	 * Note:
-	 *	ITLBMISS has EXPEVT==0xa40
-	 *	RTLBMISS has EXPEVT==0x040
-	 *	WTLBMISS has EXPEVT==0x060
-	 */
-	expevt4 = (expevt >> 4);
-	/* TODO : xor ssr_md into this expression too. Then we can check
-	 * that PRU is set when it needs to be. */
-	index = expevt4 ^ (expevt4 >> 5);
-	index &= 7;
-	protection_flags = expevt_lookup_table.protection_flags[index];
-	textaccess       = expevt_lookup_table.is_text_access[index];
-
-	/* SIM
-	 * Note this is now called with interrupts still disabled
-	 * This is to cope with being called for a missing IO port
-	 * address with interrupts disabled. This should be fixed as
-	 * soon as we have a better 'fast path' miss handler.
-	 *
-	 * Plus take care how you try and debug this stuff.
-	 * For example, writing debug data to a port which you
-	 * have just faulted on is not going to work.
-	 */
-
-	tsk = current;
-	mm = tsk->mm;
-
-	if ((address >= VMALLOC_START && address < VMALLOC_END) ||
-	    (address >= IOBASE_VADDR  && address < IOBASE_END)) {
-		if (ssr_md)
-			/*
-			 * Process-contexts can never have this address
-			 * range mapped
-			 */
-			if (handle_vmalloc_fault(mm, protection_flags,
-						 textaccess, address))
-				return 1;
-	} else if (!in_interrupt() && mm) {
-		if (handle_tlbmiss(mm, protection_flags, textaccess, address))
-			return 1;
-	}
-
-	return 0;
-}
diff --git a/arch/sh/mm/flush-sh4.c b/arch/sh/mm/flush-sh4.c
new file mode 100644
index 00000000000..0b85dd9dd3a
--- /dev/null
+++ b/arch/sh/mm/flush-sh4.c
@@ -0,0 +1,110 @@
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/cache_insns.h>
+#include <asm/cacheflush.h>
+#include <asm/traps.h>
+
+/*
+ * Write back the dirty D-caches, but not invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+static void sh4__flush_wback_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+
+	while (cnt) {
+		__ocbwb(v); v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
+
+/*
+ * Write back the dirty D-caches and invalidate them.
+ *
+ * START: Virtual Address (U0, P1, or P3)
+ * SIZE: Size of the region.
+ */
+static void sh4__flush_purge_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		__ocbp(v); v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+	while (cnt) {
+		__ocbp(v); v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
+
+/*
+ * No write back please
+ */
+static void sh4__flush_invalidate_region(void *start, int size)
+{
+	reg_size_t aligned_start, v, cnt, end;
+
+	aligned_start = register_align(start);
+	v = aligned_start & ~(L1_CACHE_BYTES-1);
+	end = (aligned_start + size + L1_CACHE_BYTES-1)
+		& ~(L1_CACHE_BYTES-1);
+	cnt = (end - v) / L1_CACHE_BYTES;
+
+	while (cnt >= 8) {
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		__ocbi(v); v += L1_CACHE_BYTES;
+		cnt -= 8;
+	}
+
+	while (cnt) {
+		__ocbi(v); v += L1_CACHE_BYTES;
+		cnt--;
+	}
+}
+
+void __init sh4__flush_region_init(void)
+{
+	__flush_wback_region		= sh4__flush_wback_region;
+	__flush_invalidate_region	= sh4__flush_invalidate_region;
+	__flush_purge_region		= sh4__flush_purge_region;
+}
diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c
new file mode 100644
index 00000000000..bf8daf9d9c9
--- /dev/null
+++ b/arch/sh/mm/gup.c
@@ -0,0 +1,273 @@
+/*
+ * Lockless get_user_pages_fast for SuperH
+ *
+ * Copyright (C) 2009 - 2010  Paul Mundt
+ *
+ * Cloned from the x86 and PowerPC versions, by:
+ *
+ *	Copyright (C) 2008 Nick Piggin
+ *	Copyright (C) 2008 Novell Inc.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
+
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+#ifndef CONFIG_X2TLB
+	return ACCESS_ONCE(*ptep);
+#else
+	/*
+	 * With get_user_pages_fast, we walk down the pagetables without
+	 * taking any locks.  For this we would like to load the pointers
+	 * atomically, but that is not possible with 64-bit PTEs.  What
+	 * we do have is the guarantee that a pte will only either go
+	 * from not present to present, or present to not present or both
+	 * -- it will not switch to a completely different present page
+	 * without a TLB flush in between; something that we are blocking
+	 * by holding interrupts off.
+	 *
+	 * Setting ptes from not present to present goes:
+	 * ptep->pte_high = h;
+	 * smp_wmb();
+	 * ptep->pte_low = l;
+	 *
+	 * And present to not present goes:
+	 * ptep->pte_low = 0;
+	 * smp_wmb();
+	 * ptep->pte_high = 0;
+	 *
+	 * We must ensure here that the load of pte_low sees l iff pte_high
+	 * sees h. We load pte_high *after* loading pte_low, which ensures we
+	 * don't see an older value of pte_high.  *Then* we recheck pte_low,
+	 * which ensures that we haven't picked up a changed pte high. We might
+	 * have got rubbish values from pte_low and pte_high, but we are
+	 * guaranteed that pte_low will not have the present bit set *unless*
+	 * it is 'l'. And get_user_pages_fast only operates on present ptes, so
+	 * we're safe.
+	 *
+	 * gup_get_pte should not be used or copied outside gup.c without being
+	 * very careful -- it does not atomically load the pte or anything that
+	 * is likely to be useful for you.
+	 */
+	pte_t pte;
+
+retry:
+	pte.pte_low = ptep->pte_low;
+	smp_rmb();
+	pte.pte_high = ptep->pte_high;
+	smp_rmb();
+	if (unlikely(pte.pte_low != ptep->pte_low))
+		goto retry;
+
+	return pte;
+#endif
+}
+
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	u64 mask, result;
+	pte_t *ptep;
+
+#ifdef CONFIG_X2TLB
+	result = _PAGE_PRESENT | _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ);
+	if (write)
+		result |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE);
+#elif defined(CONFIG_SUPERH64)
+	result = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
+	if (write)
+		result |= _PAGE_WRITE;
+#else
+	result = _PAGE_PRESENT | _PAGE_USER;
+	if (write)
+		result |= _PAGE_RW;
+#endif
+
+	mask = result | _PAGE_SPECIAL;
+
+	ptep = pte_offset_map(&pmd, addr);
+	do {
+		pte_t pte = gup_get_pte(ptep);
+		struct page *page;
+
+		if ((pte_val(pte) & mask) != result) {
+			pte_unmap(ptep);
+			return 0;
+		}
+		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+		page = pte_page(pte);
+		get_page(page);
+		pages[*nr] = page;
+		(*nr)++;
+
+	} while (ptep++, addr += PAGE_SIZE, addr != end);
+	pte_unmap(ptep - 1);
+
+	return 1;
+}
+
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pmd_t *pmdp;
+
+	pmdp = pmd_offset(&pud, addr);
+	do {
+		pmd_t pmd = *pmdp;
+
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(pmd))
+			return 0;
+		if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+			return 0;
+	} while (pmdp++, addr = next, addr != end);
+
+	return 1;
+}
+
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+			int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pud_t *pudp;
+
+	pudp = pud_offset(&pgd, addr);
+	do {
+		pud_t pud = *pudp;
+
+		next = pud_addr_end(addr, end);
+		if (pud_none(pud))
+			return 0;
+		if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+			return 0;
+	} while (pudp++, addr = next, addr != end);
+
+	return 1;
+}
+
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			  struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next;
+	unsigned long flags;
+	pgd_t *pgdp;
+	int nr = 0;
+
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+					(void __user *)start, len)))
+		return 0;
+
+	/*
+	 * This doesn't prevent pagetable teardown, but does prevent
+	 * the pagetables and pages from being freed.
+	 */
+	local_irq_save(flags);
+	pgdp = pgd_offset(mm, addr);
+	do {
+		pgd_t pgd = *pgdp;
+
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			break;
+		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+			break;
+	} while (pgdp++, addr = next, addr != end);
+	local_irq_restore(flags);
+
+	return nr;
+}
+
+/**
+ * get_user_pages_fast() - pin user pages in memory
+ * @start:	starting user address
+ * @nr_pages:	number of pages from start to pin
+ * @write:	whether pages will be written to
+ * @pages:	array that receives pointers to the pages pinned.
+ *		Should be at least nr_pages long.
+ *
+ * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * If not successful, it will fall back to taking the lock and
+ * calling get_user_pages().
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno.
+ */
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next;
+	pgd_t *pgdp;
+	int nr = 0;
+
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+
+	end = start + len;
+	if (end < start)
+		goto slow_irqon;
+
+	local_irq_disable();
+	pgdp = pgd_offset(mm, addr);
+	do {
+		pgd_t pgd = *pgdp;
+
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			goto slow;
+		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+			goto slow;
+	} while (pgdp++, addr = next, addr != end);
+	local_irq_enable();
+
+	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+	return nr;
+
+	{
+		int ret;
+
+slow:
+		local_irq_enable();
+slow_irqon:
+		/* Try to get the remaining pages with get_user_pages */
+		start += nr << PAGE_SHIFT;
+		pages += nr;
+
+		down_read(&mm->mmap_sem);
+		ret = get_user_pages(current, mm, start,
+			(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
+		up_read(&mm->mmap_sem);
+
+		/* Have to be a bit careful with return values */
+		if (nr > 0) {
+			if (ret < 0)
+				ret = nr;
+			else
+				ret += nr;
+		}
+
+		return ret;
+	}
+}
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index ae8c321d6e2..d7762349ea4 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -13,7 +13,6 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
-#include <linux/slab.h>
 #include <linux/sysctl.h>
 
 #include <asm/mman.h>
@@ -22,7 +21,8 @@
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -35,7 +35,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 		if (pud) {
 			pmd = pmd_alloc(mm, pud, addr);
 			if (pmd)
-				pte = pte_alloc_map(mm, pmd, addr);
+				pte = pte_alloc_map(mm, NULL, pmd, addr);
 		}
 	}
 
@@ -78,6 +78,11 @@ int pmd_huge(pmd_t pmd)
 	return 0;
 }
 
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmd, int write)
 {
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 53dde060736..2d089fe2cba 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -2,7 +2,7 @@
  * linux/arch/sh/mm/init.c
  *
  *  Copyright (C) 1999  Niibe Yutaka
- *  Copyright (C) 2002 - 2007  Paul Mundt
+ *  Copyright (C) 2002 - 2011  Paul Mundt
  *
  *  Based on linux/arch/i386/mm/init.c:
  *   Copyright (C) 1995  Linus Torvalds
@@ -10,115 +10,95 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/init.h>
+#include <linux/gfp.h>
 #include <linux/bootmem.h>
 #include <linux/proc_fs.h>
 #include <linux/pagemap.h>
 #include <linux/percpu.h>
 #include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
 #include <asm/mmu_context.h>
+#include <asm/mmzone.h>
+#include <asm/kexec.h>
 #include <asm/tlb.h>
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
+#include <asm/setup.h>
 #include <asm/cache.h>
+#include <asm/sizes.h>
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 pgd_t swapper_pg_dir[PTRS_PER_PGD];
-unsigned long cached_to_uncached = 0;
 
-void show_mem(void)
+void __init generic_mem_init(void)
 {
-	int total = 0, reserved = 0, free = 0;
-	int shared = 0, cached = 0, slab = 0;
-	pg_data_t *pgdat;
-
-	printk("Mem-info:\n");
-	show_free_areas();
-
-	for_each_online_pgdat(pgdat) {
-		unsigned long flags, i;
-
-		pgdat_resize_lock(pgdat, &flags);
-		for (i = 0; i < pgdat->node_spanned_pages; i++) {
-			struct page *page = pgdat_page_nr(pgdat, i);
-			total++;
-			if (PageReserved(page))
-				reserved++;
-			else if (PageSwapCache(page))
-				cached++;
-			else if (PageSlab(page))
-				slab++;
-			else if (!page_count(page))
-				free++;
-			else
-				shared += page_count(page) - 1;
-		}
-		pgdat_resize_unlock(pgdat, &flags);
-	}
+	memblock_add(__MEMORY_START, __MEMORY_SIZE);
+}
 
-	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-	printk("%d pages of RAM\n", total);
-	printk("%d free pages\n", free);
-	printk("%d reserved pages\n", reserved);
-	printk("%d slab pages\n", slab);
-	printk("%d pages shared\n", shared);
-	printk("%d pages swap cached\n", cached);
-	printk(KERN_INFO "Total of %ld pages in page table cache\n",
-	       quicklist_total_size());
+void __init __weak plat_mem_setup(void)
+{
+	/* Nothing to see here, move along. */
 }
 
 #ifdef CONFIG_MMU
-static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
+static pte_t *__get_pte_phys(unsigned long addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte;
 
 	pgd = pgd_offset_k(addr);
 	if (pgd_none(*pgd)) {
 		pgd_ERROR(*pgd);
-		return;
+		return NULL;
 	}
 
 	pud = pud_alloc(NULL, pgd, addr);
 	if (unlikely(!pud)) {
 		pud_ERROR(*pud);
-		return;
+		return NULL;
 	}
 
 	pmd = pmd_alloc(NULL, pud, addr);
 	if (unlikely(!pmd)) {
 		pmd_ERROR(*pmd);
-		return;
+		return NULL;
 	}
 
-	pte = pte_offset_kernel(pmd, addr);
+	return pte_offset_kernel(pmd, addr);
+}
+
+static void set_pte_phys(unsigned long addr, unsigned long phys, pgprot_t prot)
+{
+	pte_t *pte;
+
+	pte = __get_pte_phys(addr);
 	if (!pte_none(*pte)) {
 		pte_ERROR(*pte);
 		return;
 	}
 
 	set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, prot));
+	local_flush_tlb_one(get_asid(), addr);
 
-	if (cached_to_uncached)
-		flush_tlb_one(get_asid(), addr);
+	if (pgprot_val(prot) & _PAGE_WIRED)
+		tlb_wire_entry(NULL, addr, *pte);
+}
+
+static void clear_pte_phys(unsigned long addr, pgprot_t prot)
+{
+	pte_t *pte;
+
+	pte = __get_pte_phys(addr);
+
+	if (pgprot_val(prot) & _PAGE_WIRED)
+		tlb_unwire_entry();
+
+	set_pte(pte, pfn_pte(0, __pgprot(0)));
+	local_flush_tlb_one(get_asid(), addr);
 }
 
-/*
- * As a performance optimization, other platforms preserve the fixmap mapping
- * across a context switch, we don't presently do this, but this could be done
- * in a similar fashion as to the wired TLB interface that sh64 uses (by way
- * of the memory mapped UTLB configuration) -- this unfortunately forces us to
- * give up a TLB entry for each mapping we want to preserve. While this may be
- * viable for a small number of fixmaps, it's not particularly useful for
- * everything and needs to be carefully evaluated. (ie, we may want this for
- * the vsyscall page).
- *
- * XXX: Perhaps add a _PAGE_WIRED flag or something similar that we can pass
- * in at __set_fixmap() time to determine the appropriate behavior to follow.
- *
- *					 -- PFM.
- */
 void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
 	unsigned long address = __fix_to_virt(idx);
@@ -131,46 +111,251 @@ void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 	set_pte_phys(address, phys, prot);
 }
 
+void __clear_fixmap(enum fixed_addresses idx, pgprot_t prot)
+{
+	unsigned long address = __fix_to_virt(idx);
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+
+	clear_pte_phys(address, prot);
+}
+
+static pmd_t * __init one_md_table_init(pud_t *pud)
+{
+	if (pud_none(*pud)) {
+		pmd_t *pmd;
+
+		pmd = alloc_bootmem_pages(PAGE_SIZE);
+		pud_populate(&init_mm, pud, pmd);
+		BUG_ON(pmd != pmd_offset(pud, 0));
+	}
+
+	return pmd_offset(pud, 0);
+}
+
+static pte_t * __init one_page_table_init(pmd_t *pmd)
+{
+	if (pmd_none(*pmd)) {
+		pte_t *pte;
+
+		pte = alloc_bootmem_pages(PAGE_SIZE);
+		pmd_populate_kernel(&init_mm, pmd, pte);
+		BUG_ON(pte != pte_offset_kernel(pmd, 0));
+	}
+
+	return pte_offset_kernel(pmd, 0);
+}
+
+static pte_t * __init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
+					    unsigned long vaddr, pte_t *lastpte)
+{
+	return pte;
+}
+
 void __init page_table_range_init(unsigned long start, unsigned long end,
 					 pgd_t *pgd_base)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	int pgd_idx;
+	pte_t *pte = NULL;
+	int i, j, k;
 	unsigned long vaddr;
 
-	vaddr = start & PMD_MASK;
-	end = (end + PMD_SIZE - 1) & PMD_MASK;
-	pgd_idx = pgd_index(vaddr);
-	pgd = pgd_base + pgd_idx;
-
-	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
-		BUG_ON(pgd_none(*pgd));
-		pud = pud_offset(pgd, 0);
-		BUG_ON(pud_none(*pud));
-		pmd = pmd_offset(pud, 0);
-
-		if (!pmd_present(*pmd)) {
-			pte_t *pte_table;
-			pte_table = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
-			memset(pte_table, 0, PAGE_SIZE);
-			pmd_populate_kernel(&init_mm, pmd, pte_table);
+	vaddr = start;
+	i = __pgd_offset(vaddr);
+	j = __pud_offset(vaddr);
+	k = __pmd_offset(vaddr);
+	pgd = pgd_base + i;
+
+	for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+		pud = (pud_t *)pgd;
+		for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) {
+			pmd = one_md_table_init(pud);
+#ifndef __PAGETABLE_PMD_FOLDED
+			pmd += k;
+#endif
+			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
+				pte = page_table_kmap_check(one_page_table_init(pmd),
+							    pmd, vaddr, pte);
+				vaddr += PMD_SIZE;
+			}
+			k = 0;
 		}
-
-		vaddr += PMD_SIZE;
+		j = 0;
 	}
 }
 #endif	/* CONFIG_MMU */
 
-/*
- * paging_init() sets up the page tables
- */
+void __init allocate_pgdat(unsigned int nid)
+{
+	unsigned long start_pfn, end_pfn;
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	unsigned long phys;
+#endif
+
+	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	phys = __memblock_alloc_base(sizeof(struct pglist_data),
+				SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT);
+	/* Retry with all of system memory */
+	if (!phys)
+		phys = __memblock_alloc_base(sizeof(struct pglist_data),
+					SMP_CACHE_BYTES, memblock_end_of_DRAM());
+	if (!phys)
+		panic("Can't allocate pgdat for node %d\n", nid);
+
+	NODE_DATA(nid) = __va(phys);
+	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+#endif
+
+	NODE_DATA(nid)->node_start_pfn = start_pfn;
+	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+}
+
+static void __init bootmem_init_one_node(unsigned int nid)
+{
+	unsigned long total_pages, paddr;
+	unsigned long end_pfn;
+	struct pglist_data *p;
+
+	p = NODE_DATA(nid);
+
+	/* Nothing to do.. */
+	if (!p->node_spanned_pages)
+		return;
+
+	end_pfn = pgdat_end_pfn(p);
+
+	total_pages = bootmem_bootmap_pages(p->node_spanned_pages);
+
+	paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE);
+	if (!paddr)
+		panic("Can't allocate bootmap for nid[%d]\n", nid);
+
+	init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn);
+
+	free_bootmem_with_active_regions(nid, end_pfn);
+
+	/*
+	 * XXX Handle initial reservations for the system memory node
+	 * only for the moment, we'll refactor this later for handling
+	 * reservations in other nodes.
+	 */
+	if (nid == 0) {
+		struct memblock_region *reg;
+
+		/* Reserve the sections we're already using. */
+		for_each_memblock(reserved, reg) {
+			reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
+		}
+	}
+
+	sparse_memory_present_with_active_regions(nid);
+}
+
+static void __init do_init_bootmem(void)
+{
+	struct memblock_region *reg;
+	int i;
+
+	/* Add active regions with valid PFNs. */
+	for_each_memblock(memory, reg) {
+		unsigned long start_pfn, end_pfn;
+		start_pfn = memblock_region_memory_base_pfn(reg);
+		end_pfn = memblock_region_memory_end_pfn(reg);
+		__add_active_range(0, start_pfn, end_pfn);
+	}
+
+	/* All of system RAM sits in node 0 for the non-NUMA case */
+	allocate_pgdat(0);
+	node_set_online(0);
+
+	plat_mem_setup();
+
+	for_each_online_node(i)
+		bootmem_init_one_node(i);
+
+	sparse_init();
+}
+
+static void __init early_reserve_mem(void)
+{
+	unsigned long start_pfn;
+	u32 zero_base = (u32)__MEMORY_START + (u32)PHYSICAL_OFFSET;
+	u32 start = zero_base + (u32)CONFIG_ZERO_PAGE_OFFSET;
+
+	/*
+	 * Partially used pages are not usable - thus
+	 * we are rounding upwards:
+	 */
+	start_pfn = PFN_UP(__pa(_end));
+
+	/*
+	 * Reserve the kernel text and Reserve the bootmem bitmap. We do
+	 * this in two steps (first step was init_bootmem()), because
+	 * this catches the (definitely buggy) case of us accidentally
+	 * initializing the bootmem allocator with an invalid RAM area.
+	 */
+	memblock_reserve(start, (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) - start);
+
+	/*
+	 * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET.
+	 */
+	if (CONFIG_ZERO_PAGE_OFFSET != 0)
+		memblock_reserve(zero_base, CONFIG_ZERO_PAGE_OFFSET);
+
+	/*
+	 * Handle additional early reservations
+	 */
+	check_for_initrd();
+	reserve_crashkernel();
+}
+
 void __init paging_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	unsigned long vaddr, end;
 	int nid;
 
+	sh_mv.mv_mem_init();
+
+	early_reserve_mem();
+
+	/*
+	 * Once the early reservations are out of the way, give the
+	 * platforms a chance to kick out some memory.
+	 */
+	if (sh_mv.mv_mem_reserve)
+		sh_mv.mv_mem_reserve();
+
+	memblock_enforce_memory_limit(memory_limit);
+	memblock_allow_resize();
+
+	memblock_dump_all();
+
+	/*
+	 * Determine low and high memory ranges:
+	 */
+	max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	min_low_pfn = __MEMORY_START >> PAGE_SHIFT;
+
+	nodes_clear(node_online_map);
+
+	memory_start = (unsigned long)__va(__MEMORY_START);
+	memory_end = memory_start + (memory_limit ?: memblock_phys_mem_size());
+
+	uncached_init();
+	pmb_init();
+	do_init_bootmem();
+	ioremap_fixed_init();
+
 	/* We don't need to map the kernel through the TLB, as
 	 * it is permanatly mapped using P1. So clear the
 	 * entire pgd. */
@@ -180,10 +365,16 @@ void __init paging_init(void)
 	 * check for a null value. */
 	set_TTB(swapper_pg_dir);
 
-	/* Populate the relevant portions of swapper_pg_dir so that
+	/*
+	 * Populate the relevant portions of swapper_pg_dir so that
 	 * we can use the fixmap entries without calling kmalloc.
-	 * pte's will be filled in by __set_fixmap(). */
-	page_table_range_init(FIXADDR_START, FIXADDR_TOP, swapper_pg_dir);
+	 * pte's will be filled in by __set_fixmap().
+	 */
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+	end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+	page_table_range_init(vaddr, end, swapper_pg_dir);
+
+	kmap_coherent_init();
 
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 
@@ -191,7 +382,7 @@ void __init paging_init(void)
 		pg_data_t *pgdat = NODE_DATA(nid);
 		unsigned long low, start_pfn;
 
-		start_pfn = pgdat->bdata->node_boot_start >> PAGE_SHIFT;
+		start_pfn = pgdat->bdata->node_min_pfn;
 		low = pgdat->bdata->node_low_pfn;
 
 		if (max_zone_pfns[ZONE_NORMAL] < low)
@@ -202,120 +393,98 @@ void __init paging_init(void)
 	}
 
 	free_area_init_nodes(max_zone_pfns);
+}
 
-#ifdef CONFIG_SUPERH32
-	/* Set up the uncached fixmap */
-	set_fixmap_nocache(FIX_UNCACHED, __pa(&__uncached_start));
-
-#ifdef CONFIG_29BIT
-	/*
-	 * Handle trivial transitions between cached and uncached
-	 * segments, making use of the 1:1 mapping relationship in
-	 * 512MB lowmem.
-	 */
-	cached_to_uncached = P2SEG - P1SEG;
-#endif
-#endif
+/*
+ * Early initialization for any I/O MMUs we might have.
+ */
+static void __init iommu_init(void)
+{
+	no_iommu_init();
 }
 
-static struct kcore_list kcore_mem, kcore_vmalloc;
-int after_bootmem = 0;
+unsigned int mem_init_done = 0;
 
 void __init mem_init(void)
 {
-	int codesize, datasize, initsize;
-	int nid;
-
-	num_physpages = 0;
-	high_memory = NULL;
-
-	for_each_online_node(nid) {
-		pg_data_t *pgdat = NODE_DATA(nid);
-		unsigned long node_pages = 0;
-		void *node_high_memory;
+	pg_data_t *pgdat;
 
-		num_physpages += pgdat->node_present_pages;
+	iommu_init();
 
-		if (pgdat->node_spanned_pages)
-			node_pages = free_all_bootmem_node(pgdat);
+	high_memory = NULL;
+	for_each_online_pgdat(pgdat)
+		high_memory = max_t(void *, high_memory,
+				    __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT));
 
-		totalram_pages += node_pages;
+	free_all_bootmem();
 
-		node_high_memory = (void *)__va((pgdat->node_start_pfn +
-						 pgdat->node_spanned_pages) <<
-						 PAGE_SHIFT);
-		if (node_high_memory > high_memory)
-			high_memory = node_high_memory;
-	}
+	/* Set this up early, so we can take care of the zero page */
+	cpu_cache_init();
 
 	/* clear the zero-page */
 	memset(empty_zero_page, 0, PAGE_SIZE);
 	__flush_wback_region(empty_zero_page, PAGE_SIZE);
 
-	after_bootmem = 1;
+	vsyscall_init();
 
-	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
-	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
-	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+	mem_init_print_info(NULL);
+	pr_info("virtual kernel memory layout:\n"
+		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#ifdef CONFIG_HIGHMEM
+		"    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#endif
+		"    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
+		"    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB) (cached)\n"
+#ifdef CONFIG_UNCACHED_MAPPING
+		"            : 0x%08lx - 0x%08lx   (%4ld MB) (uncached)\n"
+#endif
+		"      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+		"      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+		"      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n",
+		FIXADDR_START, FIXADDR_TOP,
+		(FIXADDR_TOP - FIXADDR_START) >> 10,
+
+#ifdef CONFIG_HIGHMEM
+		PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
+		(LAST_PKMAP*PAGE_SIZE) >> 10,
+#endif
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
-	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END - VMALLOC_START);
+		(unsigned long)VMALLOC_START, VMALLOC_END,
+		(VMALLOC_END - VMALLOC_START) >> 20,
 
-	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
-	       "%dk data, %dk init)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
-		num_physpages << (PAGE_SHIFT-10),
-		codesize >> 10,
-		datasize >> 10,
-		initsize >> 10);
+		(unsigned long)memory_start, (unsigned long)high_memory,
+		((unsigned long)high_memory - (unsigned long)memory_start) >> 20,
 
-	p3_cache_init();
+#ifdef CONFIG_UNCACHED_MAPPING
+		uncached_start, uncached_end, uncached_size >> 20,
+#endif
 
-	/* Initialize the vDSO */
-	vsyscall_init();
+		(unsigned long)&__init_begin, (unsigned long)&__init_end,
+		((unsigned long)&__init_end -
+		 (unsigned long)&__init_begin) >> 10,
+
+		(unsigned long)&_etext, (unsigned long)&_edata,
+		((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
+
+		(unsigned long)&_text, (unsigned long)&_etext,
+		((unsigned long)&_etext - (unsigned long)&_text) >> 10);
+
+	mem_init_done = 1;
 }
 
 void free_initmem(void)
 {
-	unsigned long addr;
-
-	addr = (unsigned long)(&__init_begin);
-	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(addr));
-		init_page_count(virt_to_page(addr));
-		free_page(addr);
-		totalram_pages++;
-	}
-	printk("Freeing unused kernel memory: %ldk freed\n",
-	       ((unsigned long)&__init_end -
-	        (unsigned long)&__init_begin) >> 10);
+	free_initmem_default(-1);
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	unsigned long p;
-	for (p = start; p < end; p += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(p));
-		init_page_count(virt_to_page(p));
-		free_page(p);
-		totalram_pages++;
-	}
-	printk("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+	free_reserved_area((void *)start, (void *)end, -1, "initrd");
 }
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-void online_page(struct page *page)
-{
-	ClearPageReserved(page);
-	init_page_count(page);
-	__free_page(page);
-	totalram_pages++;
-	num_physpages++;
-}
-
 int arch_add_memory(int nid, u64 start, u64 size)
 {
 	pg_data_t *pgdat;
@@ -326,7 +495,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	pgdat = NODE_DATA(nid);
 
 	/* We only have ZONE_NORMAL, so this is easy.. */
-	ret = __add_pages(pgdat->node_zones + ZONE_NORMAL, start_pfn, nr_pages);
+	ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL,
+				start_pfn, nr_pages);
 	if (unlikely(ret))
 		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
 
@@ -342,4 +512,22 @@ int memory_add_physaddr_to_nid(u64 addr)
 }
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	struct zone *zone;
+	int ret;
+
+	zone = page_zone(pfn_to_page(start_pfn));
+	ret = __remove_pages(zone, start_pfn, nr_pages);
+	if (unlikely(ret))
+		pr_warn("%s: Failed, __remove_pages() == %d\n", __func__,
+			ret);
+
+	return ret;
+}
 #endif
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
new file mode 100644
index 00000000000..0c99ec2e7ed
--- /dev/null
+++ b/arch/sh/mm/ioremap.c
@@ -0,0 +1,137 @@
+/*
+ * arch/sh/mm/ioremap.c
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ * (C) Copyright 2005 - 2010  Paul Mundt
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ */
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/addrspace.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void __iomem * __init_refok
+__ioremap_caller(phys_addr_t phys_addr, unsigned long size,
+		 pgprot_t pgprot, void *caller)
+{
+	struct vm_struct *area;
+	unsigned long offset, last_addr, addr, orig_addr;
+	void __iomem *mapped;
+
+	/* Don't allow wraparound or zero size */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	/*
+	 * If we can't yet use the regular approach, go the fixmap route.
+	 */
+	if (!mem_init_done)
+		return ioremap_fixed(phys_addr, size, pgprot);
+
+	/*
+	 * First try to remap through the PMB.
+	 * PMB entries are all pre-faulted.
+	 */
+	mapped = pmb_remap_caller(phys_addr, size, pgprot, caller);
+	if (mapped && !IS_ERR(mapped))
+		return mapped;
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area_caller(size, VM_IOREMAP, caller);
+	if (!area)
+		return NULL;
+	area->phys_addr = phys_addr;
+	orig_addr = addr = (unsigned long)area->addr;
+
+	if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
+		vunmap((void *)orig_addr);
+		return NULL;
+	}
+
+	return (void __iomem *)(offset + (char *)orig_addr);
+}
+EXPORT_SYMBOL(__ioremap_caller);
+
+/*
+ * Simple checks for non-translatable mappings.
+ */
+static inline int iomapping_nontranslatable(unsigned long offset)
+{
+#ifdef CONFIG_29BIT
+	/*
+	 * In 29-bit mode this includes the fixed P1/P2 areas, as well as
+	 * parts of P3.
+	 */
+	if (PXSEG(offset) < P3SEG || offset >= P3_ADDR_MAX)
+		return 1;
+#endif
+
+	return 0;
+}
+
+void __iounmap(void __iomem *addr)
+{
+	unsigned long vaddr = (unsigned long __force)addr;
+	struct vm_struct *p;
+
+	/*
+	 * Nothing to do if there is no translatable mapping.
+	 */
+	if (iomapping_nontranslatable(vaddr))
+		return;
+
+	/*
+	 * There's no VMA if it's from an early fixed mapping.
+	 */
+	if (iounmap_fixed(addr) == 0)
+		return;
+
+	/*
+	 * If the PMB handled it, there's nothing else to do.
+	 */
+	if (pmb_unmap(addr) == 0)
+		return;
+
+	p = remove_vm_area((void *)(vaddr & PAGE_MASK));
+	if (!p) {
+		printk(KERN_ERR "%s: bad address %p\n", __func__, addr);
+		return;
+	}
+
+	kfree(p);
+}
+EXPORT_SYMBOL(__iounmap);
diff --git a/arch/sh/mm/ioremap_32.c b/arch/sh/mm/ioremap_32.c
deleted file mode 100644
index 882a32ebc6b..00000000000
--- a/arch/sh/mm/ioremap_32.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * arch/sh/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- * (C) Copyright 2005, 2006 Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file "COPYING" in the main directory of this
- * archive for more details.
- */
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/pci.h>
-#include <linux/io.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/addrspace.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu.h>
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
-			unsigned long flags)
-{
-	struct vm_struct * area;
-	unsigned long offset, last_addr, addr, orig_addr;
-	pgprot_t pgprot;
-
-	/* Don't allow wraparound or zero size */
-	last_addr = phys_addr + size - 1;
-	if (!size || last_addr < phys_addr)
-		return NULL;
-
-	/*
-	 * If we're on an SH7751 or SH7780 PCI controller, PCI memory is
-	 * mapped at the end of the address space (typically 0xfd000000)
-	 * in a non-translatable area, so mapping through page tables for
-	 * this area is not only pointless, but also fundamentally
-	 * broken. Just return the physical address instead.
-	 *
-	 * For boards that map a small PCI memory aperture somewhere in
-	 * P1/P2 space, ioremap() will already do the right thing,
-	 * and we'll never get this far.
-	 */
-	if (is_pci_memaddr(phys_addr) && is_pci_memaddr(last_addr))
-		return (void __iomem *)phys_addr;
-
-	/*
-	 * Don't allow anybody to remap normal RAM that we're using..
-	 */
-	if (phys_addr < virt_to_phys(high_memory))
-		return NULL;
-
-	/*
-	 * Mappings have to be page-aligned
-	 */
-	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
-	size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
-	/*
-	 * Ok, go for it..
-	 */
-	area = get_vm_area(size, VM_IOREMAP);
-	if (!area)
-		return NULL;
-	area->phys_addr = phys_addr;
-	orig_addr = addr = (unsigned long)area->addr;
-
-#ifdef CONFIG_32BIT
-	/*
-	 * First try to remap through the PMB once a valid VMA has been
-	 * established. Smaller allocations (or the rest of the size
-	 * remaining after a PMB mapping due to the size not being
-	 * perfectly aligned on a PMB size boundary) are then mapped
-	 * through the UTLB using conventional page tables.
-	 *
-	 * PMB entries are all pre-faulted.
-	 */
-	if (unlikely(size >= 0x1000000)) {
-		unsigned long mapped = pmb_remap(addr, phys_addr, size, flags);
-
-		if (likely(mapped)) {
-			addr		+= mapped;
-			phys_addr	+= mapped;
-			size		-= mapped;
-		}
-	}
-#endif
-
-	pgprot = __pgprot(pgprot_val(PAGE_KERNEL_NOCACHE) | flags);
-	if (likely(size))
-		if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
-			vunmap((void *)orig_addr);
-			return NULL;
-		}
-
-	return (void __iomem *)(offset + (char *)orig_addr);
-}
-EXPORT_SYMBOL(__ioremap);
-
-void __iounmap(void __iomem *addr)
-{
-	unsigned long vaddr = (unsigned long __force)addr;
-	struct vm_struct *p;
-
-	if (PXSEG(vaddr) < P3SEG || is_pci_memaddr(vaddr))
-		return;
-
-#ifdef CONFIG_32BIT
-	/*
-	 * Purge any PMB entries that may have been established for this
-	 * mapping, then proceed with conventional VMA teardown.
-	 *
-	 * XXX: Note that due to the way that remove_vm_area() does
-	 * matching of the resultant VMA, we aren't able to fast-forward
-	 * the address past the PMB space until the end of the VMA where
-	 * the page tables reside. As such, unmap_vm_area() will be
-	 * forced to linearly scan over the area until it finds the page
-	 * tables where PTEs that need to be unmapped actually reside,
-	 * which is far from optimal. Perhaps we need to use a separate
-	 * VMA for the PMB mappings?
-	 *					-- PFM.
-	 */
-	pmb_unmap(vaddr);
-#endif
-
-	p = remove_vm_area((void *)(vaddr & PAGE_MASK));
-	if (!p) {
-		printk(KERN_ERR "%s: bad address %p\n", __func__, addr);
-		return;
-	}
-
-	kfree(p);
-}
-EXPORT_SYMBOL(__iounmap);
diff --git a/arch/sh/mm/ioremap_64.c b/arch/sh/mm/ioremap_64.c
deleted file mode 100644
index cea224c3e49..00000000000
--- a/arch/sh/mm/ioremap_64.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * arch/sh/mm/ioremap_64.c
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- * Copyright (C) 2003 - 2007  Paul Mundt
- *
- * Mostly derived from arch/sh/mm/ioremap.c which, in turn is mostly
- * derived from arch/i386/mm/ioremap.c .
- *
- *   (C) Copyright 1995 1996 Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/vmalloc.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/io.h>
-#include <linux/bootmem.h>
-#include <linux/proc_fs.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/addrspace.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu.h>
-
-static void shmedia_mapioaddr(unsigned long, unsigned long);
-static unsigned long shmedia_ioremap(struct resource *, u32, int);
-
-/*
- * Generic mapping function (not visible outside):
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void *__ioremap(unsigned long phys_addr, unsigned long size,
-		unsigned long flags)
-{
-	void * addr;
-	struct vm_struct * area;
-	unsigned long offset, last_addr;
-	pgprot_t pgprot;
-
-	/* Don't allow wraparound or zero size */
-	last_addr = phys_addr + size - 1;
-	if (!size || last_addr < phys_addr)
-		return NULL;
-
-	pgprot = __pgprot(_PAGE_PRESENT  | _PAGE_READ   |
-			  _PAGE_WRITE    | _PAGE_DIRTY  |
-			  _PAGE_ACCESSED | _PAGE_SHARED | flags);
-
-	/*
-	 * Mappings have to be page-aligned
-	 */
-	offset = phys_addr & ~PAGE_MASK;
-	phys_addr &= PAGE_MASK;
-	size = PAGE_ALIGN(last_addr + 1) - phys_addr;
-
-	/*
-	 * Ok, go for it..
-	 */
-	area = get_vm_area(size, VM_IOREMAP);
-	pr_debug("Get vm_area returns %p addr %p\n",area,area->addr);
-	if (!area)
-		return NULL;
-	area->phys_addr = phys_addr;
-	addr = area->addr;
-	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
-			       phys_addr, pgprot)) {
-		vunmap(addr);
-		return NULL;
-	}
-	return (void *) (offset + (char *)addr);
-}
-EXPORT_SYMBOL(__ioremap);
-
-void __iounmap(void *addr)
-{
-	struct vm_struct *area;
-
-	vfree((void *) (PAGE_MASK & (unsigned long) addr));
-	area = remove_vm_area((void *) (PAGE_MASK & (unsigned long) addr));
-	if (!area) {
-		printk(KERN_ERR "iounmap: bad address %p\n", addr);
-		return;
-	}
-
-	kfree(area);
-}
-EXPORT_SYMBOL(__iounmap);
-
-static struct resource shmedia_iomap = {
-	.name	= "shmedia_iomap",
-	.start	= IOBASE_VADDR + PAGE_SIZE,
-	.end	= IOBASE_END - 1,
-};
-
-static void shmedia_mapioaddr(unsigned long pa, unsigned long va);
-static void shmedia_unmapioaddr(unsigned long vaddr);
-static unsigned long shmedia_ioremap(struct resource *res, u32 pa, int sz);
-
-/*
- * We have the same problem as the SPARC, so lets have the same comment:
- * Our mini-allocator...
- * Boy this is gross! We need it because we must map I/O for
- * timers and interrupt controller before the kmalloc is available.
- */
-
-#define XNMLN  15
-#define XNRES  10
-
-struct xresource {
-	struct resource xres;   /* Must be first */
-	int xflag;              /* 1 == used */
-	char xname[XNMLN+1];
-};
-
-static struct xresource xresv[XNRES];
-
-static struct xresource *xres_alloc(void)
-{
-        struct xresource *xrp;
-        int n;
-
-        xrp = xresv;
-        for (n = 0; n < XNRES; n++) {
-                if (xrp->xflag == 0) {
-                        xrp->xflag = 1;
-                        return xrp;
-                }
-                xrp++;
-        }
-        return NULL;
-}
-
-static void xres_free(struct xresource *xrp)
-{
-	xrp->xflag = 0;
-}
-
-static struct resource *shmedia_find_resource(struct resource *root,
-					      unsigned long vaddr)
-{
-	struct resource *res;
-
-	for (res = root->child; res; res = res->sibling)
-		if (res->start <= vaddr && res->end >= vaddr)
-			return res;
-
-	return NULL;
-}
-
-static unsigned long shmedia_alloc_io(unsigned long phys, unsigned long size,
-				      const char *name)
-{
-        static int printed_full = 0;
-        struct xresource *xres;
-        struct resource *res;
-        char *tack;
-        int tlen;
-
-        if (name == NULL) name = "???";
-
-        if ((xres = xres_alloc()) != 0) {
-                tack = xres->xname;
-                res = &xres->xres;
-        } else {
-                if (!printed_full) {
-                        printk("%s: done with statics, switching to kmalloc\n",
-			       __func__);
-                        printed_full = 1;
-                }
-                tlen = strlen(name);
-                tack = kmalloc(sizeof (struct resource) + tlen + 1, GFP_KERNEL);
-                if (!tack)
-			return -ENOMEM;
-                memset(tack, 0, sizeof(struct resource));
-                res = (struct resource *) tack;
-                tack += sizeof (struct resource);
-        }
-
-        strncpy(tack, name, XNMLN);
-        tack[XNMLN] = 0;
-        res->name = tack;
-
-        return shmedia_ioremap(res, phys, size);
-}
-
-static unsigned long shmedia_ioremap(struct resource *res, u32 pa, int sz)
-{
-        unsigned long offset = ((unsigned long) pa) & (~PAGE_MASK);
-	unsigned long round_sz = (offset + sz + PAGE_SIZE-1) & PAGE_MASK;
-        unsigned long va;
-        unsigned int psz;
-
-        if (allocate_resource(&shmedia_iomap, res, round_sz,
-			      shmedia_iomap.start, shmedia_iomap.end,
-			      PAGE_SIZE, NULL, NULL) != 0) {
-                panic("alloc_io_res(%s): cannot occupy\n",
-                    (res->name != NULL)? res->name: "???");
-        }
-
-        va = res->start;
-        pa &= PAGE_MASK;
-
-	psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
-
-	/* log at boot time ... */
-	printk("mapioaddr: %6s  [%2d page%s]  va 0x%08lx   pa 0x%08x\n",
-	       ((res->name != NULL) ? res->name : "???"),
-	       psz, psz == 1 ? " " : "s", va, pa);
-
-        for (psz = res->end - res->start + 1; psz != 0; psz -= PAGE_SIZE) {
-                shmedia_mapioaddr(pa, va);
-                va += PAGE_SIZE;
-                pa += PAGE_SIZE;
-        }
-
-        res->start += offset;
-        res->end = res->start + sz - 1;         /* not strictly necessary.. */
-
-        return res->start;
-}
-
-static void shmedia_free_io(struct resource *res)
-{
-	unsigned long len = res->end - res->start + 1;
-
-	BUG_ON((len & (PAGE_SIZE - 1)) != 0);
-
-	while (len) {
-		len -= PAGE_SIZE;
-		shmedia_unmapioaddr(res->start + len);
-	}
-
-	release_resource(res);
-}
-
-static __init_refok void *sh64_get_page(void)
-{
-	extern int after_bootmem;
-	void *page;
-
-	if (after_bootmem) {
-		page = (void *)get_zeroed_page(GFP_ATOMIC);
-	} else {
-		page = alloc_bootmem_pages(PAGE_SIZE);
-	}
-
-	if (!page || ((unsigned long)page & ~PAGE_MASK))
-		panic("sh64_get_page: Out of memory already?\n");
-
-	return page;
-}
-
-static void shmedia_mapioaddr(unsigned long pa, unsigned long va)
-{
-	pgd_t *pgdp;
-	pud_t *pudp;
-	pmd_t *pmdp;
-	pte_t *ptep, pte;
-	pgprot_t prot;
-	unsigned long flags = 1; /* 1 = CB0-1 device */
-
-	pr_debug("shmedia_mapiopage pa %08lx va %08lx\n",  pa, va);
-
-	pgdp = pgd_offset_k(va);
-	if (pgd_none(*pgdp) || !pgd_present(*pgdp)) {
-		pudp = (pud_t *)sh64_get_page();
-		set_pgd(pgdp, __pgd((unsigned long)pudp | _KERNPG_TABLE));
-	}
-
-	pudp = pud_offset(pgdp, va);
-	if (pud_none(*pudp) || !pud_present(*pudp)) {
-		pmdp = (pmd_t *)sh64_get_page();
-		set_pud(pudp, __pud((unsigned long)pmdp | _KERNPG_TABLE));
-	}
-
-	pmdp = pmd_offset(pudp, va);
-	if (pmd_none(*pmdp) || !pmd_present(*pmdp) ) {
-		ptep = (pte_t *)sh64_get_page();
-		set_pmd(pmdp, __pmd((unsigned long)ptep + _PAGE_TABLE));
-	}
-
-	prot = __pgprot(_PAGE_PRESENT | _PAGE_READ     | _PAGE_WRITE  |
-			_PAGE_DIRTY   | _PAGE_ACCESSED | _PAGE_SHARED | flags);
-
-	pte = pfn_pte(pa >> PAGE_SHIFT, prot);
-	ptep = pte_offset_kernel(pmdp, va);
-
-	if (!pte_none(*ptep) &&
-	    pte_val(*ptep) != pte_val(pte))
-		pte_ERROR(*ptep);
-
-	set_pte(ptep, pte);
-
-	flush_tlb_kernel_range(va, PAGE_SIZE);
-}
-
-static void shmedia_unmapioaddr(unsigned long vaddr)
-{
-	pgd_t *pgdp;
-	pud_t *pudp;
-	pmd_t *pmdp;
-	pte_t *ptep;
-
-	pgdp = pgd_offset_k(vaddr);
-	if (pgd_none(*pgdp) || pgd_bad(*pgdp))
-		return;
-
-	pudp = pud_offset(pgdp, vaddr);
-	if (pud_none(*pudp) || pud_bad(*pudp))
-		return;
-
-	pmdp = pmd_offset(pudp, vaddr);
-	if (pmd_none(*pmdp) || pmd_bad(*pmdp))
-		return;
-
-	ptep = pte_offset_kernel(pmdp, vaddr);
-
-	if (pte_none(*ptep) || !pte_present(*ptep))
-		return;
-
-	clear_page((void *)ptep);
-	pte_clear(&init_mm, vaddr, ptep);
-}
-
-unsigned long onchip_remap(unsigned long phys, unsigned long size, const char *name)
-{
-	if (size < PAGE_SIZE)
-		size = PAGE_SIZE;
-
-	return shmedia_alloc_io(phys, size, name);
-}
-
-void onchip_unmap(unsigned long vaddr)
-{
-	struct resource *res;
-	unsigned int psz;
-
-	res = shmedia_find_resource(&shmedia_iomap, vaddr);
-	if (!res) {
-		printk(KERN_ERR "%s: Failed to free 0x%08lx\n",
-		       __func__, vaddr);
-		return;
-	}
-
-        psz = (res->end - res->start + (PAGE_SIZE - 1)) / PAGE_SIZE;
-
-        printk(KERN_DEBUG "unmapioaddr: %6s  [%2d page%s] freed\n",
-	       res->name, psz, psz == 1 ? " " : "s");
-
-	shmedia_free_io(res);
-
-	if ((char *)res >= (char *)xresv &&
-	    (char *)res <  (char *)&xresv[XNRES]) {
-		xres_free((struct xresource *)res);
-	} else {
-		kfree(res);
-	}
-}
-
-#ifdef CONFIG_PROC_FS
-static int
-ioremap_proc_info(char *buf, char **start, off_t fpos, int length, int *eof,
-		  void *data)
-{
-	char *p = buf, *e = buf + length;
-	struct resource *r;
-	const char *nm;
-
-	for (r = ((struct resource *)data)->child; r != NULL; r = r->sibling) {
-		if (p + 32 >= e)        /* Better than nothing */
-			break;
-		if ((nm = r->name) == 0) nm = "???";
-		p += sprintf(p, "%08lx-%08lx: %s\n",
-			     (unsigned long)r->start,
-			     (unsigned long)r->end, nm);
-	}
-
-	return p-buf;
-}
-#endif /* CONFIG_PROC_FS */
-
-static int __init register_proc_onchip(void)
-{
-#ifdef CONFIG_PROC_FS
-	create_proc_read_entry("io_map",0,0, ioremap_proc_info, &shmedia_iomap);
-#endif
-	return 0;
-}
-
-__initcall(register_proc_onchip);
diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c
new file mode 100644
index 00000000000..efbe84af998
--- /dev/null
+++ b/arch/sh/mm/ioremap_fixed.c
@@ -0,0 +1,134 @@
+/*
+ * Re-map IO memory to kernel address space so that we can access it.
+ *
+ * These functions should only be used when it is necessary to map a
+ * physical address space into the kernel address space before ioremap()
+ * can be used, e.g. early in boot before paging_init().
+ *
+ * Copyright (C) 2009  Matt Fleming
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/bootmem.h>
+#include <linux/proc_fs.h>
+#include <asm/fixmap.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/addrspace.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+
+struct ioremap_map {
+	void __iomem *addr;
+	unsigned long size;
+	unsigned long fixmap_addr;
+};
+
+static struct ioremap_map ioremap_maps[FIX_N_IOREMAPS];
+
+void __init ioremap_fixed_init(void)
+{
+	struct ioremap_map *map;
+	int i;
+
+	for (i = 0; i < FIX_N_IOREMAPS; i++) {
+		map = &ioremap_maps[i];
+		map->fixmap_addr = __fix_to_virt(FIX_IOREMAP_BEGIN + i);
+	}
+}
+
+void __init __iomem *
+ioremap_fixed(phys_addr_t phys_addr, unsigned long size, pgprot_t prot)
+{
+	enum fixed_addresses idx0, idx;
+	struct ioremap_map *map;
+	unsigned int nrpages;
+	unsigned long offset;
+	int i, slot;
+
+	/*
+	 * Mappings have to be page-aligned
+	 */
+	offset = phys_addr & ~PAGE_MASK;
+	phys_addr &= PAGE_MASK;
+	size = PAGE_ALIGN(phys_addr + size) - phys_addr;
+
+	slot = -1;
+	for (i = 0; i < FIX_N_IOREMAPS; i++) {
+		map = &ioremap_maps[i];
+		if (!map->addr) {
+			map->size = size;
+			slot = i;
+			break;
+		}
+	}
+
+	if (slot < 0)
+		return NULL;
+
+	/*
+	 * Mappings have to fit in the FIX_IOREMAP area.
+	 */
+	nrpages = size >> PAGE_SHIFT;
+	if (nrpages > FIX_N_IOREMAPS)
+		return NULL;
+
+	/*
+	 * Ok, go for it..
+	 */
+	idx0 = FIX_IOREMAP_BEGIN + slot;
+	idx = idx0;
+	while (nrpages > 0) {
+		pgprot_val(prot) |= _PAGE_WIRED;
+		__set_fixmap(idx, phys_addr, prot);
+		phys_addr += PAGE_SIZE;
+		idx++;
+		--nrpages;
+	}
+
+	map->addr = (void __iomem *)(offset + map->fixmap_addr);
+	return map->addr;
+}
+
+int iounmap_fixed(void __iomem *addr)
+{
+	enum fixed_addresses idx;
+	struct ioremap_map *map;
+	unsigned int nrpages;
+	int i, slot;
+
+	slot = -1;
+	for (i = 0; i < FIX_N_IOREMAPS; i++) {
+		map = &ioremap_maps[i];
+		if (map->addr == addr) {
+			slot = i;
+			break;
+		}
+	}
+
+	/*
+	 * If we don't match, it's not for us.
+	 */
+	if (slot < 0)
+		return -EINVAL;
+
+	nrpages = map->size >> PAGE_SHIFT;
+
+	idx = FIX_IOREMAP_BEGIN + slot + nrpages - 1;
+	while (nrpages > 0) {
+		__clear_fixmap(idx, __pgprot(_PAGE_WIRED));
+		--idx;
+		--nrpages;
+	}
+
+	map->size = 0;
+	map->addr = NULL;
+
+	return 0;
+}
diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c
new file mode 100644
index 00000000000..ec29e14ec5a
--- /dev/null
+++ b/arch/sh/mm/kmap.c
@@ -0,0 +1,67 @@
+/*
+ * arch/sh/mm/kmap.c
+ *
+ * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
+ * Copyright (C) 2002 - 2009  Paul Mundt
+ *
+ * Released under the terms of the GNU GPL v2.0.
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+#define kmap_get_fixmap_pte(vaddr)                                     \
+	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
+
+static pte_t *kmap_coherent_pte;
+
+void __init kmap_coherent_init(void)
+{
+	unsigned long vaddr;
+
+	/* cache the first coherent kmap pte */
+	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
+	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
+}
+
+void *kmap_coherent(struct page *page, unsigned long addr)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	BUG_ON(!test_bit(PG_dcache_clean, &page->flags));
+
+	pagefault_disable();
+
+	idx = FIX_CMAP_END -
+		(((addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1)) +
+		 (FIX_N_COLOURS * smp_processor_id()));
+
+	vaddr = __fix_to_virt(idx);
+
+	BUG_ON(!pte_none(*(kmap_coherent_pte - idx)));
+	set_pte(kmap_coherent_pte - idx, mk_pte(page, PAGE_KERNEL));
+
+	return (void *)vaddr;
+}
+
+void kunmap_coherent(void *kvaddr)
+{
+	if (kvaddr >= (void *)FIXADDR_START) {
+		unsigned long vaddr = (unsigned long)kvaddr & PAGE_MASK;
+		enum fixed_addresses idx = __virt_to_fix(vaddr);
+
+		/* XXX.. Kill this later, here for sanity at the moment.. */
+		__flush_purge_region((void *)vaddr, PAGE_SIZE);
+
+		pte_clear(&init_mm, vaddr, kmap_coherent_pte - idx);
+		local_flush_tlb_one(get_asid(), vaddr);
+	}
+
+	pagefault_enable();
+}
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
new file mode 100644
index 00000000000..6777177807c
--- /dev/null
+++ b/arch/sh/mm/mmap.c
@@ -0,0 +1,163 @@
+/*
+ * arch/sh/mm/mmap.c
+ *
+ * Copyright (C) 2008 - 2009  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
+EXPORT_SYMBOL(shm_align_mask);
+
+#ifdef CONFIG_MMU
+/*
+ * To avoid cache aliases, we map the shared page with same color.
+ */
+static inline unsigned long COLOUR_ALIGN(unsigned long addr,
+					 unsigned long pgoff)
+{
+	unsigned long base = (addr + shm_align_mask) & ~shm_align_mask;
+	unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask;
+
+	return base + off;
+}
+
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+	unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int do_colour_align;
+	struct vm_unmapped_area_info info;
+
+	if (flags & MAP_FIXED) {
+		/* We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (unlikely(len > TASK_SIZE))
+		return -ENOMEM;
+
+	do_colour_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_colour_align = 1;
+
+	if (addr) {
+		if (do_colour_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = TASK_UNMAPPED_BASE;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = do_colour_align ? (PAGE_MASK & shm_align_mask) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	return vm_unmapped_area(&info);
+}
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+	int do_colour_align;
+	struct vm_unmapped_area_info info;
+
+	if (flags & MAP_FIXED) {
+		/* We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (unlikely(len > TASK_SIZE))
+		return -ENOMEM;
+
+	do_colour_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_colour_align = 1;
+
+	/* requesting a specific address */
+	if (addr) {
+		if (do_colour_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = mm->mmap_base;
+	info.align_mask = do_colour_align ? (PAGE_MASK & shm_align_mask) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	addr = vm_unmapped_area(&info);
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	if (addr & ~PAGE_MASK) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = TASK_SIZE;
+		addr = vm_unmapped_area(&info);
+	}
+
+	return addr;
+}
+#endif /* CONFIG_MMU */
+
+/*
+ * You really shouldn't be using read() or write() on /dev/mem.  This
+ * might go away in the future.
+ */
+int valid_phys_addr_range(phys_addr_t addr, size_t count)
+{
+	if (addr < __MEMORY_START)
+		return 0;
+	if (addr + count > __pa(high_memory))
+		return 0;
+
+	return 1;
+}
+
+int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
+{
+	return 1;
+}
diff --git a/arch/sh/mm/tlb-nommu.c b/arch/sh/mm/nommu.c
index 15111bc7ddd..36312d254fa 100644
--- a/arch/sh/mm/tlb-nommu.c
+++ b/arch/sh/mm/nommu.c
@@ -1,19 +1,41 @@
 /*
- * arch/sh/mm/tlb-nommu.c
+ * arch/sh/mm/nommu.c
  *
- * TLB Operations for MMUless SH.
+ * Various helper routines and stubs for MMUless SH.
  *
- * Copyright (C) 2002 Paul Mundt
+ * Copyright (C) 2002 - 2009 Paul Mundt
  *
  * Released under the terms of the GNU GPL v2.0.
  */
 #include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
 #include <linux/mm.h>
 #include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
 
 /*
  * Nothing too terribly exciting here ..
  */
+void copy_page(void *to, void *from)
+{
+	memcpy(to, from, PAGE_SIZE);
+}
+
+__kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n)
+{
+	memcpy(to, from, n);
+	return 0;
+}
+
+__kernel_size_t __clear_user(void *to, __kernel_size_t n)
+{
+	memset(to, 0, n);
+	return 0;
+}
+
 void local_flush_tlb_all(void)
 {
 	BUG();
@@ -45,8 +67,25 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 	BUG();
 }
 
-void update_mmu_cache(struct vm_area_struct * vma,
-		      unsigned long address, pte_t pte)
+void __flush_tlb_global(void)
+{
+}
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+}
+
+void __init kmap_coherent_init(void)
+{
+}
+
+void *kmap_coherent(struct page *page, unsigned long addr)
+{
+	BUG();
+	return NULL;
+}
+
+void kunmap_coherent(void *kvaddr)
 {
 	BUG();
 }
@@ -59,3 +98,7 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
 void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
 }
+
+void pgtable_cache_init(void)
+{
+}
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 2de7302724f..3d85225b9e9 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -9,12 +9,12 @@
  */
 #include <linux/module.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/numa.h>
 #include <linux/pfn.h>
 #include <asm/sections.h>
 
-static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL_GPL(node_data);
 
@@ -24,64 +24,47 @@ EXPORT_SYMBOL_GPL(node_data);
  * latency. Each node's pgdat is node-local at the beginning of the node,
  * immediately followed by the node mem map.
  */
-void __init setup_memory(void)
-{
-	unsigned long free_pfn = PFN_UP(__pa(_end));
-
-	/*
-	 * Node 0 sets up its pgdat at the first available pfn,
-	 * and bumps it up before setting up the bootmem allocator.
-	 */
-	NODE_DATA(0) = pfn_to_kaddr(free_pfn);
-	memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
-	free_pfn += PFN_UP(sizeof(struct pglist_data));
-	NODE_DATA(0)->bdata = &plat_node_bdata[0];
-
-	/* Set up node 0 */
-	setup_bootmem_allocator(free_pfn);
-
-	/* Give the platforms a chance to hook up their nodes */
-	plat_mem_setup();
-}
-
 void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 {
-	unsigned long bootmap_pages, bootmap_start, bootmap_size;
-	unsigned long start_pfn, free_pfn, end_pfn;
+	unsigned long bootmap_pages;
+	unsigned long start_pfn, end_pfn;
+	unsigned long bootmem_paddr;
 
 	/* Don't allow bogus node assignment */
-	BUG_ON(nid > MAX_NUMNODES || nid == 0);
+	BUG_ON(nid > MAX_NUMNODES || nid <= 0);
 
-	/*
-	 * The free pfn starts at the beginning of the range, and is
-	 * advanced as necessary for pgdat and node map allocations.
-	 */
-	free_pfn = start_pfn = start >> PAGE_SHIFT;
+	start_pfn = start >> PAGE_SHIFT;
 	end_pfn = end >> PAGE_SHIFT;
 
-	add_active_range(nid, start_pfn, end_pfn);
+	pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
+			 PAGE_KERNEL);
+
+	memblock_add(start, end - start);
+
+	__add_active_range(nid, start_pfn, end_pfn);
 
 	/* Node-local pgdat */
-	NODE_DATA(nid) = pfn_to_kaddr(free_pfn);
-	free_pfn += PFN_UP(sizeof(struct pglist_data));
+	NODE_DATA(nid) = __va(memblock_alloc_base(sizeof(struct pglist_data),
+					     SMP_CACHE_BYTES, end));
 	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 
-	NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
 	NODE_DATA(nid)->node_start_pfn = start_pfn;
 	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
 
 	/* Node-local bootmap */
 	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-	bootmap_start = (unsigned long)pfn_to_kaddr(free_pfn);
-	bootmap_size = init_bootmem_node(NODE_DATA(nid), free_pfn, start_pfn,
-				    end_pfn);
+	bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT,
+				       PAGE_SIZE, end);
+	init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
+			  start_pfn, end_pfn);
 
 	free_bootmem_with_active_regions(nid, end_pfn);
 
 	/* Reserve the pgdat and bootmap space with the bootmem allocator */
 	reserve_bootmem_node(NODE_DATA(nid), start_pfn << PAGE_SHIFT,
 			     sizeof(struct pglist_data), BOOTMEM_DEFAULT);
-	reserve_bootmem_node(NODE_DATA(nid), free_pfn << PAGE_SHIFT,
+	reserve_bootmem_node(NODE_DATA(nid), bootmem_paddr,
 			     bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
 
 	/* It's up */
diff --git a/arch/sh/mm/pg-nommu.c b/arch/sh/mm/pg-nommu.c
deleted file mode 100644
index 677dd57f087..00000000000
--- a/arch/sh/mm/pg-nommu.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * arch/sh/mm/pg-nommu.c
- *
- * clear_page()/copy_page() implementation for MMUless SH.
- *
- * Copyright (C) 2003  Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <asm/page.h>
-
-void copy_page(void *to, void *from)
-{
-	memcpy(to, from, PAGE_SIZE);
-}
-
-void clear_page(void *to)
-{
-	memset(to, 0, PAGE_SIZE);
-}
-
-__kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n)
-{
-	memcpy(to, from, n);
-	return 0;
-}
-
-__kernel_size_t __clear_user(void *to, __kernel_size_t n)
-{
-	memset(to, 0, n);
-	return 0;
-}
diff --git a/arch/sh/mm/pg-sh4.c b/arch/sh/mm/pg-sh4.c
deleted file mode 100644
index 8c7a9ca7987..00000000000
--- a/arch/sh/mm/pg-sh4.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * arch/sh/mm/pg-sh4.c
- *
- * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2002 - 2007  Paul Mundt
- *
- * Released under the terms of the GNU GPL v2.0.
- */
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/fs.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-#define CACHE_ALIAS (current_cpu_data.dcache.alias_mask)
-
-static inline void *kmap_coherent(struct page *page, unsigned long addr)
-{
-	enum fixed_addresses idx;
-	unsigned long vaddr, flags;
-	pte_t pte;
-
-	inc_preempt_count();
-
-	idx = (addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT;
-	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
-	pte = mk_pte(page, PAGE_KERNEL);
-
-	local_irq_save(flags);
-	flush_tlb_one(get_asid(), vaddr);
-	local_irq_restore(flags);
-
-	update_mmu_cache(NULL, vaddr, pte);
-
-	return (void *)vaddr;
-}
-
-static inline void kunmap_coherent(struct page *page)
-{
-	dec_preempt_count();
-	preempt_check_resched();
-}
-
-/*
- * clear_user_page
- * @to: P1 address
- * @address: U0 address to be mapped
- * @page: page (virt_to_page(to))
- */
-void clear_user_page(void *to, unsigned long address, struct page *page)
-{
-	__set_bit(PG_mapped, &page->flags);
-
-	clear_page(to);
-	if ((((address & PAGE_MASK) ^ (unsigned long)to) & CACHE_ALIAS))
-		__flush_wback_region(to, PAGE_SIZE);
-}
-
-void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
-		       unsigned long vaddr, void *dst, const void *src,
-		       unsigned long len)
-{
-	void *vto;
-
-	__set_bit(PG_mapped, &page->flags);
-
-	vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
-	memcpy(vto, src, len);
-	kunmap_coherent(vto);
-
-	if (vma->vm_flags & VM_EXEC)
-		flush_cache_page(vma, vaddr, page_to_pfn(page));
-}
-
-void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
-			 unsigned long vaddr, void *dst, const void *src,
-			 unsigned long len)
-{
-	void *vfrom;
-
-	__set_bit(PG_mapped, &page->flags);
-
-	vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
-	memcpy(dst, vfrom, len);
-	kunmap_coherent(vfrom);
-}
-
-void copy_user_highpage(struct page *to, struct page *from,
-			unsigned long vaddr, struct vm_area_struct *vma)
-{
-	void *vfrom, *vto;
-
-	__set_bit(PG_mapped, &to->flags);
-
-	vto = kmap_atomic(to, KM_USER1);
-	vfrom = kmap_coherent(from, vaddr);
-	copy_page(vto, vfrom);
-	kunmap_coherent(vfrom);
-
-	if (((vaddr ^ (unsigned long)vto) & CACHE_ALIAS))
-		__flush_wback_region(vto, PAGE_SIZE);
-
-	kunmap_atomic(vto, KM_USER1);
-	/* Make sure this page is cleared on other CPU's too before using it */
-	smp_wmb();
-}
-EXPORT_SYMBOL(copy_user_highpage);
-
-/*
- * For SH-4, we have our own implementation for ptep_get_and_clear
- */
-inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	pte_t pte = *ptep;
-
-	pte_clear(mm, addr, ptep);
-	if (!pte_not_present(pte)) {
-		unsigned long pfn = pte_pfn(pte);
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-			struct address_space *mapping = page_mapping(page);
-			if (!mapping || !mapping_writably_mapped(mapping))
-				__clear_bit(PG_mapped, &page->flags);
-		}
-	}
-	return pte;
-}
diff --git a/arch/sh/mm/pg-sh7705.c b/arch/sh/mm/pg-sh7705.c
deleted file mode 100644
index 7f885b7f8af..00000000000
--- a/arch/sh/mm/pg-sh7705.c
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * arch/sh/mm/pg-sh7705.c
- *
- * Copyright (C) 1999, 2000  Niibe Yutaka
- * Copyright (C) 2004  Alex Song
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- */
-
-#include <linux/init.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/threads.h>
-#include <linux/fs.h>
-#include <asm/addrspace.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/io.h>
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-static inline void __flush_purge_virtual_region(void *p1, void *virt, int size)
-{
-	unsigned long v;
-	unsigned long begin, end;
-	unsigned long p1_begin;
-
-
-	begin = L1_CACHE_ALIGN((unsigned long)virt);
-	end = L1_CACHE_ALIGN((unsigned long)virt + size);
-
-	p1_begin = (unsigned long)p1 & ~(L1_CACHE_BYTES - 1);
-
-	/* do this the slow way as we may not have TLB entries
-	 * for virt yet. */
-	for (v = begin; v < end; v += L1_CACHE_BYTES) {
-		unsigned long p;
-	        unsigned long ways, addr;
-
-		p = __pa(p1_begin);
-
-	        ways = current_cpu_data.dcache.ways;
-		addr = CACHE_OC_ADDRESS_ARRAY;
-
-		do {
-			unsigned long data;
-
-			addr |= (v & current_cpu_data.dcache.entry_mask);
-
-			data = ctrl_inl(addr);
-			if ((data & CACHE_PHYSADDR_MASK) ==
-			       (p & CACHE_PHYSADDR_MASK)) {
-				data &= ~(SH_CACHE_UPDATED|SH_CACHE_VALID);
-				ctrl_outl(data, addr);
-			}
-
-			addr += current_cpu_data.dcache.way_incr;
-		} while (--ways);
-
-		p1_begin += L1_CACHE_BYTES;
-	}
-}
-
-/*
- * clear_user_page
- * @to: P1 address
- * @address: U0 address to be mapped
- */
-void clear_user_page(void *to, unsigned long address, struct page *pg)
-{
-	struct page *page = virt_to_page(to);
-
-	__set_bit(PG_mapped, &page->flags);
-	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) {
-		clear_page(to);
-		__flush_wback_region(to, PAGE_SIZE);
-	} else {
-		__flush_purge_virtual_region(to,
-					     (void *)(address & 0xfffff000),
-					     PAGE_SIZE);
-		clear_page(to);
-		__flush_wback_region(to, PAGE_SIZE);
-	}
-}
-
-/*
- * copy_user_page
- * @to: P1 address
- * @from: P1 address
- * @address: U0 address to be mapped
- */
-void copy_user_page(void *to, void *from, unsigned long address, struct page *pg)
-{
-	struct page *page = virt_to_page(to);
-
-
-	__set_bit(PG_mapped, &page->flags);
-	if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) {
-		copy_page(to, from);
-		__flush_wback_region(to, PAGE_SIZE);
-	} else {
-		__flush_purge_virtual_region(to,
-					     (void *)(address & 0xfffff000),
-					     PAGE_SIZE);
-		copy_page(to, from);
-		__flush_wback_region(to, PAGE_SIZE);
-	}
-}
-
-/*
- * For SH7705, we have our own implementation for ptep_get_and_clear
- * Copied from pg-sh4.c
- */
-inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
-	pte_t pte = *ptep;
-
-	pte_clear(mm, addr, ptep);
-	if (!pte_not_present(pte)) {
-		unsigned long pfn = pte_pfn(pte);
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-			struct address_space *mapping = page_mapping(page);
-			if (!mapping || !mapping_writably_mapped(mapping))
-				__clear_bit(PG_mapped, &page->flags);
-		}
-	}
-
-	return pte;
-}
-
diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c
new file mode 100644
index 00000000000..26e03a1f7ca
--- /dev/null
+++ b/arch/sh/mm/pgtable.c
@@ -0,0 +1,57 @@
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#define PGALLOC_GFP GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO
+
+static struct kmem_cache *pgd_cachep;
+#if PAGETABLE_LEVELS > 2
+static struct kmem_cache *pmd_cachep;
+#endif
+
+void pgd_ctor(void *x)
+{
+	pgd_t *pgd = x;
+
+	memcpy(pgd + USER_PTRS_PER_PGD,
+	       swapper_pg_dir + USER_PTRS_PER_PGD,
+	       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+}
+
+void pgtable_cache_init(void)
+{
+	pgd_cachep = kmem_cache_create("pgd_cache",
+				       PTRS_PER_PGD * (1<<PTE_MAGNITUDE),
+				       PAGE_SIZE, SLAB_PANIC, pgd_ctor);
+#if PAGETABLE_LEVELS > 2
+	pmd_cachep = kmem_cache_create("pmd_cache",
+				       PTRS_PER_PMD * (1<<PTE_MAGNITUDE),
+				       PAGE_SIZE, SLAB_PANIC, NULL);
+#endif
+}
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	return kmem_cache_alloc(pgd_cachep, PGALLOC_GFP);
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	kmem_cache_free(pgd_cachep, pgd);
+}
+
+#if PAGETABLE_LEVELS > 2
+void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	set_pud(pud, __pud((unsigned long)pmd));
+}
+
+pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	return kmem_cache_alloc(pmd_cachep, PGALLOC_GFP);
+}
+
+void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	kmem_cache_free(pmd_cachep, pmd);
+}
+#endif /* PAGETABLE_LEVELS > 2 */
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index ab81c602295..7160c9fd6fe 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -3,11 +3,8 @@
  *
  * Privileged Space Mapping Buffer (PMB) Support.
  *
- * Copyright (C) 2005, 2006, 2007 Paul Mundt
- *
- * P1/P2 Section mapping definitions from map32.h, which was:
- *
- *	Copyright 2003 (c) Lineo Solutions,Inc.
+ * Copyright (C) 2005 - 2011  Paul Mundt
+ * Copyright (C) 2010  Matt Fleming
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -15,331 +12,805 @@
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/syscore_ops.h>
+#include <linux/cpu.h>
 #include <linux/module.h>
-#include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/debugfs.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/err.h>
-#include <asm/system.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/sizes.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include <asm/page.h>
 #include <asm/mmu.h>
-#include <asm/io.h>
 #include <asm/mmu_context.h>
 
-#define NR_PMB_ENTRIES	16
-
-static struct kmem_cache *pmb_cache;
-static unsigned long pmb_map;
-
-static struct pmb_entry pmb_init_map[] = {
-	/* vpn         ppn         flags (ub/sz/c/wt) */
-
-	/* P1 Section Mappings */
-	{ 0x80000000, 0x00000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x84000000, 0x04000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x88000000, 0x08000000, PMB_SZ_128M | PMB_C, },
-	{ 0x90000000, 0x10000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x94000000, 0x14000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x98000000, 0x18000000, PMB_SZ_64M  | PMB_C, },
-
-	/* P2 Section Mappings */
-	{ 0xa0000000, 0x00000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xa4000000, 0x04000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xa8000000, 0x08000000, PMB_UB | PMB_SZ_128M | PMB_WT, },
-	{ 0xb0000000, 0x10000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xb4000000, 0x14000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xb8000000, 0x18000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
+struct pmb_entry;
+
+struct pmb_entry {
+	unsigned long vpn;
+	unsigned long ppn;
+	unsigned long flags;
+	unsigned long size;
+
+	raw_spinlock_t lock;
+
+	/*
+	 * 0 .. NR_PMB_ENTRIES for specific entry selection, or
+	 * PMB_NO_ENTRY to search for a free one
+	 */
+	int entry;
+
+	/* Adjacent entry link for contiguous multi-entry mappings */
+	struct pmb_entry *link;
 };
 
-static inline unsigned long mk_pmb_entry(unsigned int entry)
+static struct {
+	unsigned long size;
+	int flag;
+} pmb_sizes[] = {
+	{ .size	= SZ_512M, .flag = PMB_SZ_512M, },
+	{ .size = SZ_128M, .flag = PMB_SZ_128M, },
+	{ .size = SZ_64M,  .flag = PMB_SZ_64M,  },
+	{ .size = SZ_16M,  .flag = PMB_SZ_16M,  },
+};
+
+static void pmb_unmap_entry(struct pmb_entry *, int depth);
+
+static DEFINE_RWLOCK(pmb_rwlock);
+static struct pmb_entry pmb_entry_list[NR_PMB_ENTRIES];
+static DECLARE_BITMAP(pmb_map, NR_PMB_ENTRIES);
+
+static unsigned int pmb_iomapping_enabled;
+
+static __always_inline unsigned long mk_pmb_entry(unsigned int entry)
 {
 	return (entry & PMB_E_MASK) << PMB_E_SHIFT;
 }
 
-static inline unsigned long mk_pmb_addr(unsigned int entry)
+static __always_inline unsigned long mk_pmb_addr(unsigned int entry)
 {
 	return mk_pmb_entry(entry) | PMB_ADDR;
 }
 
-static inline unsigned long mk_pmb_data(unsigned int entry)
+static __always_inline unsigned long mk_pmb_data(unsigned int entry)
 {
 	return mk_pmb_entry(entry) | PMB_DATA;
 }
 
-static DEFINE_SPINLOCK(pmb_list_lock);
-static struct pmb_entry *pmb_list;
+static __always_inline unsigned int pmb_ppn_in_range(unsigned long ppn)
+{
+	return ppn >= __pa(memory_start) && ppn < __pa(memory_end);
+}
+
+/*
+ * Ensure that the PMB entries match our cache configuration.
+ *
+ * When we are in 32-bit address extended mode, CCR.CB becomes
+ * invalid, so care must be taken to manually adjust cacheable
+ * translations.
+ */
+static __always_inline unsigned long pmb_cache_flags(void)
+{
+	unsigned long flags = 0;
+
+#if defined(CONFIG_CACHE_OFF)
+	flags |= PMB_WT | PMB_UB;
+#elif defined(CONFIG_CACHE_WRITETHROUGH)
+	flags |= PMB_C | PMB_WT | PMB_UB;
+#elif defined(CONFIG_CACHE_WRITEBACK)
+	flags |= PMB_C;
+#endif
+
+	return flags;
+}
 
-static inline void pmb_list_add(struct pmb_entry *pmbe)
+/*
+ * Convert typical pgprot value to the PMB equivalent
+ */
+static inline unsigned long pgprot_to_pmb_flags(pgprot_t prot)
 {
-	struct pmb_entry **p, *tmp;
+	unsigned long pmb_flags = 0;
+	u64 flags = pgprot_val(prot);
 
-	p = &pmb_list;
-	while ((tmp = *p) != NULL)
-		p = &tmp->next;
+	if (flags & _PAGE_CACHABLE)
+		pmb_flags |= PMB_C;
+	if (flags & _PAGE_WT)
+		pmb_flags |= PMB_WT | PMB_UB;
 
-	pmbe->next = tmp;
-	*p = pmbe;
+	return pmb_flags;
 }
 
-static inline void pmb_list_del(struct pmb_entry *pmbe)
+static inline bool pmb_can_merge(struct pmb_entry *a, struct pmb_entry *b)
 {
-	struct pmb_entry **p, *tmp;
+	return (b->vpn == (a->vpn + a->size)) &&
+	       (b->ppn == (a->ppn + a->size)) &&
+	       (b->flags == a->flags);
+}
+
+static bool pmb_mapping_exists(unsigned long vaddr, phys_addr_t phys,
+			       unsigned long size)
+{
+	int i;
+
+	read_lock(&pmb_rwlock);
+
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		struct pmb_entry *pmbe, *iter;
+		unsigned long span;
 
-	for (p = &pmb_list; (tmp = *p); p = &tmp->next)
-		if (tmp == pmbe) {
-			*p = tmp->next;
-			return;
+		if (!test_bit(i, pmb_map))
+			continue;
+
+		pmbe = &pmb_entry_list[i];
+
+		/*
+		 * See if VPN and PPN are bounded by an existing mapping.
+		 */
+		if ((vaddr < pmbe->vpn) || (vaddr >= (pmbe->vpn + pmbe->size)))
+			continue;
+		if ((phys < pmbe->ppn) || (phys >= (pmbe->ppn + pmbe->size)))
+			continue;
+
+		/*
+		 * Now see if we're in range of a simple mapping.
+		 */
+		if (size <= pmbe->size) {
+			read_unlock(&pmb_rwlock);
+			return true;
+		}
+
+		span = pmbe->size;
+
+		/*
+		 * Finally for sizes that involve compound mappings, walk
+		 * the chain.
+		 */
+		for (iter = pmbe->link; iter; iter = iter->link)
+			span += iter->size;
+
+		/*
+		 * Nothing else to do if the range requirements are met.
+		 */
+		if (size <= span) {
+			read_unlock(&pmb_rwlock);
+			return true;
 		}
+	}
+
+	read_unlock(&pmb_rwlock);
+	return false;
 }
 
-struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
-			    unsigned long flags)
+static bool pmb_size_valid(unsigned long size)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++)
+		if (pmb_sizes[i].size == size)
+			return true;
+
+	return false;
+}
+
+static inline bool pmb_addr_valid(unsigned long addr, unsigned long size)
+{
+	return (addr >= P1SEG && (addr + size - 1) < P3SEG);
+}
+
+static inline bool pmb_prot_valid(pgprot_t prot)
+{
+	return (pgprot_val(prot) & _PAGE_USER) == 0;
+}
+
+static int pmb_size_to_flags(unsigned long size)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++)
+		if (pmb_sizes[i].size == size)
+			return pmb_sizes[i].flag;
+
+	return 0;
+}
+
+static int pmb_alloc_entry(void)
+{
+	int pos;
+
+	pos = find_first_zero_bit(pmb_map, NR_PMB_ENTRIES);
+	if (pos >= 0 && pos < NR_PMB_ENTRIES)
+		__set_bit(pos, pmb_map);
+	else
+		pos = -ENOSPC;
+
+	return pos;
+}
+
+static struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
+				   unsigned long flags, int entry)
 {
 	struct pmb_entry *pmbe;
+	unsigned long irqflags;
+	void *ret = NULL;
+	int pos;
+
+	write_lock_irqsave(&pmb_rwlock, irqflags);
+
+	if (entry == PMB_NO_ENTRY) {
+		pos = pmb_alloc_entry();
+		if (unlikely(pos < 0)) {
+			ret = ERR_PTR(pos);
+			goto out;
+		}
+	} else {
+		if (__test_and_set_bit(entry, pmb_map)) {
+			ret = ERR_PTR(-ENOSPC);
+			goto out;
+		}
+
+		pos = entry;
+	}
+
+	write_unlock_irqrestore(&pmb_rwlock, irqflags);
+
+	pmbe = &pmb_entry_list[pos];
+
+	memset(pmbe, 0, sizeof(struct pmb_entry));
 
-	pmbe = kmem_cache_alloc(pmb_cache, GFP_KERNEL);
-	if (!pmbe)
-		return ERR_PTR(-ENOMEM);
+	raw_spin_lock_init(&pmbe->lock);
 
 	pmbe->vpn	= vpn;
 	pmbe->ppn	= ppn;
 	pmbe->flags	= flags;
-
-	spin_lock_irq(&pmb_list_lock);
-	pmb_list_add(pmbe);
-	spin_unlock_irq(&pmb_list_lock);
+	pmbe->entry	= pos;
 
 	return pmbe;
+
+out:
+	write_unlock_irqrestore(&pmb_rwlock, irqflags);
+	return ret;
 }
 
-void pmb_free(struct pmb_entry *pmbe)
+static void pmb_free(struct pmb_entry *pmbe)
 {
-	spin_lock_irq(&pmb_list_lock);
-	pmb_list_del(pmbe);
-	spin_unlock_irq(&pmb_list_lock);
+	__clear_bit(pmbe->entry, pmb_map);
 
-	kmem_cache_free(pmb_cache, pmbe);
+	pmbe->entry	= PMB_NO_ENTRY;
+	pmbe->link	= NULL;
 }
 
 /*
- * Must be in P2 for __set_pmb_entry()
+ * Must be run uncached.
  */
-int __set_pmb_entry(unsigned long vpn, unsigned long ppn,
-		    unsigned long flags, int *entry)
+static void __set_pmb_entry(struct pmb_entry *pmbe)
 {
-	unsigned int pos = *entry;
+	unsigned long addr, data;
 
-	if (unlikely(pos == PMB_NO_ENTRY))
-		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
+	addr = mk_pmb_addr(pmbe->entry);
+	data = mk_pmb_data(pmbe->entry);
 
-repeat:
-	if (unlikely(pos > NR_PMB_ENTRIES))
-		return -ENOSPC;
+	jump_to_uncached();
 
-	if (test_and_set_bit(pos, &pmb_map)) {
-		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
-		goto repeat;
-	}
+	/* Set V-bit */
+	__raw_writel(pmbe->vpn | PMB_V, addr);
+	__raw_writel(pmbe->ppn | pmbe->flags | PMB_V, data);
 
-	ctrl_outl(vpn | PMB_V, mk_pmb_addr(pos));
+	back_to_cached();
+}
 
-#ifdef CONFIG_CACHE_WRITETHROUGH
-	/*
-	 * When we are in 32-bit address extended mode, CCR.CB becomes
-	 * invalid, so care must be taken to manually adjust cacheable
-	 * translations.
-	 */
-	if (likely(flags & PMB_C))
-		flags |= PMB_WT;
-#endif
+static void __clear_pmb_entry(struct pmb_entry *pmbe)
+{
+	unsigned long addr, data;
+	unsigned long addr_val, data_val;
 
-	ctrl_outl(ppn | flags | PMB_V, mk_pmb_data(pos));
+	addr = mk_pmb_addr(pmbe->entry);
+	data = mk_pmb_data(pmbe->entry);
 
-	*entry = pos;
+	addr_val = __raw_readl(addr);
+	data_val = __raw_readl(data);
 
-	return 0;
+	/* Clear V-bit */
+	writel_uncached(addr_val & ~PMB_V, addr);
+	writel_uncached(data_val & ~PMB_V, data);
 }
 
-int __uses_jump_to_uncached set_pmb_entry(struct pmb_entry *pmbe)
+#ifdef CONFIG_PM
+static void set_pmb_entry(struct pmb_entry *pmbe)
 {
-	int ret;
+	unsigned long flags;
 
-	jump_to_uncached();
-	ret = __set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, &pmbe->entry);
-	back_to_cached();
+	raw_spin_lock_irqsave(&pmbe->lock, flags);
+	__set_pmb_entry(pmbe);
+	raw_spin_unlock_irqrestore(&pmbe->lock, flags);
+}
+#endif /* CONFIG_PM */
 
-	return ret;
+int pmb_bolt_mapping(unsigned long vaddr, phys_addr_t phys,
+		     unsigned long size, pgprot_t prot)
+{
+	struct pmb_entry *pmbp, *pmbe;
+	unsigned long orig_addr, orig_size;
+	unsigned long flags, pmb_flags;
+	int i, mapped;
+
+	if (size < SZ_16M)
+		return -EINVAL;
+	if (!pmb_addr_valid(vaddr, size))
+		return -EFAULT;
+	if (pmb_mapping_exists(vaddr, phys, size))
+		return 0;
+
+	orig_addr = vaddr;
+	orig_size = size;
+
+	flush_tlb_kernel_range(vaddr, vaddr + size);
+
+	pmb_flags = pgprot_to_pmb_flags(prot);
+	pmbp = NULL;
+
+	do {
+		for (i = mapped = 0; i < ARRAY_SIZE(pmb_sizes); i++) {
+			if (size < pmb_sizes[i].size)
+				continue;
+
+			pmbe = pmb_alloc(vaddr, phys, pmb_flags |
+					 pmb_sizes[i].flag, PMB_NO_ENTRY);
+			if (IS_ERR(pmbe)) {
+				pmb_unmap_entry(pmbp, mapped);
+				return PTR_ERR(pmbe);
+			}
+
+			raw_spin_lock_irqsave(&pmbe->lock, flags);
+
+			pmbe->size = pmb_sizes[i].size;
+
+			__set_pmb_entry(pmbe);
+
+			phys	+= pmbe->size;
+			vaddr	+= pmbe->size;
+			size	-= pmbe->size;
+
+			/*
+			 * Link adjacent entries that span multiple PMB
+			 * entries for easier tear-down.
+			 */
+			if (likely(pmbp)) {
+				raw_spin_lock_nested(&pmbp->lock,
+						     SINGLE_DEPTH_NESTING);
+				pmbp->link = pmbe;
+				raw_spin_unlock(&pmbp->lock);
+			}
+
+			pmbp = pmbe;
+
+			/*
+			 * Instead of trying smaller sizes on every
+			 * iteration (even if we succeed in allocating
+			 * space), try using pmb_sizes[i].size again.
+			 */
+			i--;
+			mapped++;
+
+			raw_spin_unlock_irqrestore(&pmbe->lock, flags);
+		}
+	} while (size >= SZ_16M);
+
+	flush_cache_vmap(orig_addr, orig_addr + orig_size);
+
+	return 0;
 }
 
-void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe)
+void __iomem *pmb_remap_caller(phys_addr_t phys, unsigned long size,
+			       pgprot_t prot, void *caller)
 {
-	unsigned int entry = pmbe->entry;
-	unsigned long addr;
+	unsigned long vaddr;
+	phys_addr_t offset, last_addr;
+	phys_addr_t align_mask;
+	unsigned long aligned;
+	struct vm_struct *area;
+	int i, ret;
+
+	if (!pmb_iomapping_enabled)
+		return NULL;
 
 	/*
-	 * Don't allow clearing of wired init entries, P1 or P2 access
-	 * without a corresponding mapping in the PMB will lead to reset
-	 * by the TLB.
+	 * Small mappings need to go through the TLB.
 	 */
-	if (unlikely(entry < ARRAY_SIZE(pmb_init_map) ||
-		     entry >= NR_PMB_ENTRIES))
-		return;
+	if (size < SZ_16M)
+		return ERR_PTR(-EINVAL);
+	if (!pmb_prot_valid(prot))
+		return ERR_PTR(-EINVAL);
 
-	jump_to_uncached();
+	for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++)
+		if (size >= pmb_sizes[i].size)
+			break;
 
-	/* Clear V-bit */
-	addr = mk_pmb_addr(entry);
-	ctrl_outl(ctrl_inl(addr) & ~PMB_V, addr);
+	last_addr = phys + size;
+	align_mask = ~(pmb_sizes[i].size - 1);
+	offset = phys & ~align_mask;
+	phys &= align_mask;
+	aligned = ALIGN(last_addr, pmb_sizes[i].size) - phys;
 
-	addr = mk_pmb_data(entry);
-	ctrl_outl(ctrl_inl(addr) & ~PMB_V, addr);
+	/*
+	 * XXX: This should really start from uncached_end, but this
+	 * causes the MMU to reset, so for now we restrict it to the
+	 * 0xb000...0xc000 range.
+	 */
+	area = __get_vm_area_caller(aligned, VM_IOREMAP, 0xb0000000,
+				    P3SEG, caller);
+	if (!area)
+		return NULL;
 
-	back_to_cached();
+	area->phys_addr = phys;
+	vaddr = (unsigned long)area->addr;
+
+	ret = pmb_bolt_mapping(vaddr, phys, size, prot);
+	if (unlikely(ret != 0))
+		return ERR_PTR(ret);
 
-	clear_bit(entry, &pmb_map);
+	return (void __iomem *)(offset + (char *)vaddr);
 }
 
+int pmb_unmap(void __iomem *addr)
+{
+	struct pmb_entry *pmbe = NULL;
+	unsigned long vaddr = (unsigned long __force)addr;
+	int i, found = 0;
+
+	read_lock(&pmb_rwlock);
+
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		if (test_bit(i, pmb_map)) {
+			pmbe = &pmb_entry_list[i];
+			if (pmbe->vpn == vaddr) {
+				found = 1;
+				break;
+			}
+		}
+	}
 
-static struct {
-	unsigned long size;
-	int flag;
-} pmb_sizes[] = {
-	{ .size	= 0x20000000, .flag = PMB_SZ_512M, },
-	{ .size = 0x08000000, .flag = PMB_SZ_128M, },
-	{ .size = 0x04000000, .flag = PMB_SZ_64M,  },
-	{ .size = 0x01000000, .flag = PMB_SZ_16M,  },
-};
+	read_unlock(&pmb_rwlock);
+
+	if (found) {
+		pmb_unmap_entry(pmbe, NR_PMB_ENTRIES);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static void __pmb_unmap_entry(struct pmb_entry *pmbe, int depth)
+{
+	do {
+		struct pmb_entry *pmblink = pmbe;
+
+		/*
+		 * We may be called before this pmb_entry has been
+		 * entered into the PMB table via set_pmb_entry(), but
+		 * that's OK because we've allocated a unique slot for
+		 * this entry in pmb_alloc() (even if we haven't filled
+		 * it yet).
+		 *
+		 * Therefore, calling __clear_pmb_entry() is safe as no
+		 * other mapping can be using that slot.
+		 */
+		__clear_pmb_entry(pmbe);
+
+		flush_cache_vunmap(pmbe->vpn, pmbe->vpn + pmbe->size);
+
+		pmbe = pmblink->link;
+
+		pmb_free(pmblink);
+	} while (pmbe && --depth);
+}
 
-long pmb_remap(unsigned long vaddr, unsigned long phys,
-	       unsigned long size, unsigned long flags)
+static void pmb_unmap_entry(struct pmb_entry *pmbe, int depth)
 {
-	struct pmb_entry *pmbp;
-	unsigned long wanted;
-	int pmb_flags, i;
+	unsigned long flags;
 
-	/* Convert typical pgprot value to the PMB equivalent */
-	if (flags & _PAGE_CACHABLE) {
-		if (flags & _PAGE_WT)
-			pmb_flags = PMB_WT;
-		else
-			pmb_flags = PMB_C;
-	} else
-		pmb_flags = PMB_WT | PMB_UB;
+	if (unlikely(!pmbe))
+		return;
 
-	pmbp = NULL;
-	wanted = size;
+	write_lock_irqsave(&pmb_rwlock, flags);
+	__pmb_unmap_entry(pmbe, depth);
+	write_unlock_irqrestore(&pmb_rwlock, flags);
+}
+
+static void __init pmb_notify(void)
+{
+	int i;
 
-again:
-	for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++) {
+	pr_info("PMB: boot mappings:\n");
+
+	read_lock(&pmb_rwlock);
+
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
 		struct pmb_entry *pmbe;
-		int ret;
 
-		if (size < pmb_sizes[i].size)
+		if (!test_bit(i, pmb_map))
 			continue;
 
-		pmbe = pmb_alloc(vaddr, phys, pmb_flags | pmb_sizes[i].flag);
-		if (IS_ERR(pmbe))
-			return PTR_ERR(pmbe);
+		pmbe = &pmb_entry_list[i];
 
-		ret = set_pmb_entry(pmbe);
-		if (ret != 0) {
-			pmb_free(pmbe);
-			return -EBUSY;
-		}
+		pr_info("       0x%08lx -> 0x%08lx [ %4ldMB %2scached ]\n",
+			pmbe->vpn >> PAGE_SHIFT, pmbe->ppn >> PAGE_SHIFT,
+			pmbe->size >> 20, (pmbe->flags & PMB_C) ? "" : "un");
+	}
+
+	read_unlock(&pmb_rwlock);
+}
+
+/*
+ * Sync our software copy of the PMB mappings with those in hardware. The
+ * mappings in the hardware PMB were either set up by the bootloader or
+ * very early on by the kernel.
+ */
+static void __init pmb_synchronize(void)
+{
+	struct pmb_entry *pmbp = NULL;
+	int i, j;
+
+	/*
+	 * Run through the initial boot mappings, log the established
+	 * ones, and blow away anything that falls outside of the valid
+	 * PPN range. Specifically, we only care about existing mappings
+	 * that impact the cached/uncached sections.
+	 *
+	 * Note that touching these can be a bit of a minefield; the boot
+	 * loader can establish multi-page mappings with the same caching
+	 * attributes, so we need to ensure that we aren't modifying a
+	 * mapping that we're presently executing from, or may execute
+	 * from in the case of straddling page boundaries.
+	 *
+	 * In the future we will have to tidy up after the boot loader by
+	 * jumping between the cached and uncached mappings and tearing
+	 * down alternating mappings while executing from the other.
+	 */
+	for (i = 0; i < NR_PMB_ENTRIES; i++) {
+		unsigned long addr, data;
+		unsigned long addr_val, data_val;
+		unsigned long ppn, vpn, flags;
+		unsigned long irqflags;
+		unsigned int size;
+		struct pmb_entry *pmbe;
+
+		addr = mk_pmb_addr(i);
+		data = mk_pmb_data(i);
 
-		phys	+= pmb_sizes[i].size;
-		vaddr	+= pmb_sizes[i].size;
-		size	-= pmb_sizes[i].size;
+		addr_val = __raw_readl(addr);
+		data_val = __raw_readl(data);
 
 		/*
-		 * Link adjacent entries that span multiple PMB entries
-		 * for easier tear-down.
+		 * Skip over any bogus entries
 		 */
-		if (likely(pmbp))
-			pmbp->link = pmbe;
+		if (!(data_val & PMB_V) || !(addr_val & PMB_V))
+			continue;
 
-		pmbp = pmbe;
-	}
+		ppn = data_val & PMB_PFN_MASK;
+		vpn = addr_val & PMB_PFN_MASK;
+
+		/*
+		 * Only preserve in-range mappings.
+		 */
+		if (!pmb_ppn_in_range(ppn)) {
+			/*
+			 * Invalidate anything out of bounds.
+			 */
+			writel_uncached(addr_val & ~PMB_V, addr);
+			writel_uncached(data_val & ~PMB_V, data);
+			continue;
+		}
+
+		/*
+		 * Update the caching attributes if necessary
+		 */
+		if (data_val & PMB_C) {
+			data_val &= ~PMB_CACHE_MASK;
+			data_val |= pmb_cache_flags();
+
+			writel_uncached(data_val, data);
+		}
+
+		size = data_val & PMB_SZ_MASK;
+		flags = size | (data_val & PMB_CACHE_MASK);
 
-	if (size >= 0x1000000)
-		goto again;
+		pmbe = pmb_alloc(vpn, ppn, flags, i);
+		if (IS_ERR(pmbe)) {
+			WARN_ON_ONCE(1);
+			continue;
+		}
+
+		raw_spin_lock_irqsave(&pmbe->lock, irqflags);
+
+		for (j = 0; j < ARRAY_SIZE(pmb_sizes); j++)
+			if (pmb_sizes[j].flag == size)
+				pmbe->size = pmb_sizes[j].size;
+
+		if (pmbp) {
+			raw_spin_lock_nested(&pmbp->lock, SINGLE_DEPTH_NESTING);
+			/*
+			 * Compare the previous entry against the current one to
+			 * see if the entries span a contiguous mapping. If so,
+			 * setup the entry links accordingly. Compound mappings
+			 * are later coalesced.
+			 */
+			if (pmb_can_merge(pmbp, pmbe))
+				pmbp->link = pmbe;
+			raw_spin_unlock(&pmbp->lock);
+		}
 
-	return wanted - size;
+		pmbp = pmbe;
+
+		raw_spin_unlock_irqrestore(&pmbe->lock, irqflags);
+	}
 }
 
-void pmb_unmap(unsigned long addr)
+static void __init pmb_merge(struct pmb_entry *head)
 {
-	struct pmb_entry **p, *pmbe;
+	unsigned long span, newsize;
+	struct pmb_entry *tail;
+	int i = 1, depth = 0;
+
+	span = newsize = head->size;
+
+	tail = head->link;
+	while (tail) {
+		span += tail->size;
 
-	for (p = &pmb_list; (pmbe = *p); p = &pmbe->next)
-		if (pmbe->vpn == addr)
+		if (pmb_size_valid(span)) {
+			newsize = span;
+			depth = i;
+		}
+
+		/* This is the end of the line.. */
+		if (!tail->link)
 			break;
 
-	if (unlikely(!pmbe))
-		return;
+		tail = tail->link;
+		i++;
+	}
 
-	WARN_ON(!test_bit(pmbe->entry, &pmb_map));
+	/*
+	 * The merged page size must be valid.
+	 */
+	if (!depth || !pmb_size_valid(newsize))
+		return;
 
-	do {
-		struct pmb_entry *pmblink = pmbe;
+	head->flags &= ~PMB_SZ_MASK;
+	head->flags |= pmb_size_to_flags(newsize);
 
-		clear_pmb_entry(pmbe);
-		pmbe = pmblink->link;
+	head->size = newsize;
 
-		pmb_free(pmblink);
-	} while (pmbe);
+	__pmb_unmap_entry(head->link, depth);
+	__set_pmb_entry(head);
 }
 
-static void pmb_cache_ctor(struct kmem_cache *cachep, void *pmb)
+static void __init pmb_coalesce(void)
 {
-	struct pmb_entry *pmbe = pmb;
+	unsigned long flags;
+	int i;
 
-	memset(pmb, 0, sizeof(struct pmb_entry));
+	write_lock_irqsave(&pmb_rwlock, flags);
 
-	pmbe->entry = PMB_NO_ENTRY;
-}
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		struct pmb_entry *pmbe;
 
-static int __uses_jump_to_uncached pmb_init(void)
-{
-	unsigned int nr_entries = ARRAY_SIZE(pmb_init_map);
-	unsigned int entry, i;
+		if (!test_bit(i, pmb_map))
+			continue;
 
-	BUG_ON(unlikely(nr_entries >= NR_PMB_ENTRIES));
+		pmbe = &pmb_entry_list[i];
 
-	pmb_cache = kmem_cache_create("pmb", sizeof(struct pmb_entry), 0,
-				      SLAB_PANIC, pmb_cache_ctor);
+		/*
+		 * We're only interested in compound mappings
+		 */
+		if (!pmbe->link)
+			continue;
 
-	jump_to_uncached();
+		/*
+		 * Nothing to do if it already uses the largest possible
+		 * page size.
+		 */
+		if (pmbe->size == SZ_512M)
+			continue;
+
+		pmb_merge(pmbe);
+	}
+
+	write_unlock_irqrestore(&pmb_rwlock, flags);
+}
+
+#ifdef CONFIG_UNCACHED_MAPPING
+static void __init pmb_resize(void)
+{
+	int i;
 
 	/*
-	 * Ordering is important, P2 must be mapped in the PMB before we
-	 * can set PMB.SE, and P1 must be mapped before we jump back to
-	 * P1 space.
+	 * If the uncached mapping was constructed by the kernel, it will
+	 * already be a reasonable size.
 	 */
-	for (entry = 0; entry < nr_entries; entry++) {
-		struct pmb_entry *pmbe = pmb_init_map + entry;
+	if (uncached_size == SZ_16M)
+		return;
 
-		__set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, &entry);
-	}
+	read_lock(&pmb_rwlock);
 
-	ctrl_outl(0, PMB_IRMCR);
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		struct pmb_entry *pmbe;
+		unsigned long flags;
 
-	/* PMB.SE and UB[7] */
-	ctrl_outl((1 << 31) | (1 << 7), PMB_PASCR);
+		if (!test_bit(i, pmb_map))
+			continue;
 
-	/* Flush out the TLB */
-	i =  ctrl_inl(MMUCR);
-	i |= MMUCR_TI;
-	ctrl_outl(i, MMUCR);
+		pmbe = &pmb_entry_list[i];
 
-	back_to_cached();
+		if (pmbe->vpn != uncached_start)
+			continue;
+
+		/*
+		 * Found it, now resize it.
+		 */
+		raw_spin_lock_irqsave(&pmbe->lock, flags);
+
+		pmbe->size = SZ_16M;
+		pmbe->flags &= ~PMB_SZ_MASK;
+		pmbe->flags |= pmb_size_to_flags(pmbe->size);
+
+		uncached_resize(pmbe->size);
+
+		__set_pmb_entry(pmbe);
+
+		raw_spin_unlock_irqrestore(&pmbe->lock, flags);
+	}
+
+	read_unlock(&pmb_rwlock);
+}
+#endif
+
+static int __init early_pmb(char *p)
+{
+	if (!p)
+		return 0;
+
+	if (strstr(p, "iomap"))
+		pmb_iomapping_enabled = 1;
 
 	return 0;
 }
-arch_initcall(pmb_init);
+early_param("pmb", early_pmb);
+
+void __init pmb_init(void)
+{
+	/* Synchronize software state */
+	pmb_synchronize();
+
+	/* Attempt to combine compound mappings */
+	pmb_coalesce();
+
+#ifdef CONFIG_UNCACHED_MAPPING
+	/* Resize initial mappings, if necessary */
+	pmb_resize();
+#endif
+
+	/* Log them */
+	pmb_notify();
+
+	writel_uncached(0, PMB_IRMCR);
+
+	/* Flush out the TLB */
+	local_flush_tlb_all();
+	ctrl_barrier();
+}
+
+bool __in_29bit_mode(void)
+{
+        return (__raw_readl(PMB_PASCR) & PASCR_SE) == 0;
+}
 
 static int pmb_seq_show(struct seq_file *file, void *iter)
 {
@@ -354,8 +825,8 @@ static int pmb_seq_show(struct seq_file *file, void *iter)
 		unsigned int size;
 		char *sz_str = NULL;
 
-		addr = ctrl_inl(mk_pmb_addr(i));
-		data = ctrl_inl(mk_pmb_data(i));
+		addr = __raw_readl(mk_pmb_addr(i));
+		data = __raw_readl(mk_pmb_data(i));
 
 		size = data & PMB_SZ_MASK;
 		sz_str = (size == PMB_SZ_16M)  ? " 16MB":
@@ -385,7 +856,7 @@ static const struct file_operations pmb_debugfs_fops = {
 	.open		= pmb_debugfs_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release,
+	.release	= single_release,
 };
 
 static int __init pmb_debugfs_init(void)
@@ -393,10 +864,40 @@ static int __init pmb_debugfs_init(void)
 	struct dentry *dentry;
 
 	dentry = debugfs_create_file("pmb", S_IFREG | S_IRUGO,
-				     NULL, NULL, &pmb_debugfs_fops);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
+				     arch_debugfs_dir, NULL, &pmb_debugfs_fops);
+	if (!dentry)
+		return -ENOMEM;
 
 	return 0;
 }
-postcore_initcall(pmb_debugfs_init);
+subsys_initcall(pmb_debugfs_init);
+
+#ifdef CONFIG_PM
+static void pmb_syscore_resume(void)
+{
+	struct pmb_entry *pmbe;
+	int i;
+
+	read_lock(&pmb_rwlock);
+
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		if (test_bit(i, pmb_map)) {
+			pmbe = &pmb_entry_list[i];
+			set_pmb_entry(pmbe);
+		}
+	}
+
+	read_unlock(&pmb_rwlock);
+}
+
+static struct syscore_ops pmb_syscore_ops = {
+	.resume = pmb_syscore_resume,
+};
+
+static int __init pmb_sysdev_init(void)
+{
+	register_syscore_ops(&pmb_syscore_ops);
+	return 0;
+}
+subsys_initcall(pmb_sysdev_init);
+#endif
diff --git a/arch/sh/mm/sram.c b/arch/sh/mm/sram.c
new file mode 100644
index 00000000000..2d8fa718d55
--- /dev/null
+++ b/arch/sh/mm/sram.c
@@ -0,0 +1,35 @@
+/*
+ * SRAM pool for tiny memories not otherwise managed.
+ *
+ * Copyright (C) 2010  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <asm/sram.h>
+
+/*
+ * This provides a standard SRAM pool for tiny memories that can be
+ * added either by the CPU or the platform code. Typical SRAM sizes
+ * to be inserted in to the pool will generally be less than the page
+ * size, with anything more reasonably sized handled as a NUMA memory
+ * node.
+ */
+struct gen_pool *sram_pool;
+
+static int __init sram_pool_init(void)
+{
+	/*
+	 * This is a global pool, we don't care about node locality.
+	 */
+	sram_pool = gen_pool_create(1, -1);
+	if (unlikely(!sram_pool))
+		return -ENOMEM;
+
+	return 0;
+}
+core_initcall(sram_pool_init);
diff --git a/arch/sh/mm/tlb-debugfs.c b/arch/sh/mm/tlb-debugfs.c
new file mode 100644
index 00000000000..dea637a0924
--- /dev/null
+++ b/arch/sh/mm/tlb-debugfs.c
@@ -0,0 +1,172 @@
+/*
+ * arch/sh/mm/tlb-debugfs.c
+ *
+ * debugfs ops for SH-4 ITLB/UTLBs.
+ *
+ * Copyright (C) 2010  Matt Fleming
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <asm/processor.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+enum tlb_type {
+	TLB_TYPE_ITLB,
+	TLB_TYPE_UTLB,
+};
+
+static struct {
+	int bits;
+	const char *size;
+} tlb_sizes[] = {
+	{ 0x0, "  1KB" },
+	{ 0x1, "  4KB" },
+	{ 0x2, "  8KB" },
+	{ 0x4, " 64KB" },
+	{ 0x5, "256KB" },
+	{ 0x7, "  1MB" },
+	{ 0x8, "  4MB" },
+	{ 0xc, " 64MB" },
+};
+
+static int tlb_seq_show(struct seq_file *file, void *iter)
+{
+	unsigned int tlb_type = (unsigned int)file->private;
+	unsigned long addr1, addr2, data1, data2;
+	unsigned long flags;
+	unsigned long mmucr;
+	unsigned int nentries, entry;
+	unsigned int urb;
+
+	mmucr = __raw_readl(MMUCR);
+	if ((mmucr & 0x1) == 0) {
+		seq_printf(file, "address translation disabled\n");
+		return 0;
+	}
+
+	if (tlb_type == TLB_TYPE_ITLB) {
+		addr1 = MMU_ITLB_ADDRESS_ARRAY;
+		addr2 = MMU_ITLB_ADDRESS_ARRAY2;
+		data1 = MMU_ITLB_DATA_ARRAY;
+		data2 = MMU_ITLB_DATA_ARRAY2;
+		nentries = 4;
+	} else {
+		addr1 = MMU_UTLB_ADDRESS_ARRAY;
+		addr2 = MMU_UTLB_ADDRESS_ARRAY2;
+		data1 = MMU_UTLB_DATA_ARRAY;
+		data2 = MMU_UTLB_DATA_ARRAY2;
+		nentries = 64;
+	}
+
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	urb = (mmucr & MMUCR_URB) >> MMUCR_URB_SHIFT;
+
+	/* Make the "entry >= urb" test fail. */
+	if (urb == 0)
+		urb = MMUCR_URB_NENTRIES + 1;
+
+	if (tlb_type == TLB_TYPE_ITLB) {
+		addr1 = MMU_ITLB_ADDRESS_ARRAY;
+		addr2 = MMU_ITLB_ADDRESS_ARRAY2;
+		data1 = MMU_ITLB_DATA_ARRAY;
+		data2 = MMU_ITLB_DATA_ARRAY2;
+		nentries = 4;
+	} else {
+		addr1 = MMU_UTLB_ADDRESS_ARRAY;
+		addr2 = MMU_UTLB_ADDRESS_ARRAY2;
+		data1 = MMU_UTLB_DATA_ARRAY;
+		data2 = MMU_UTLB_DATA_ARRAY2;
+		nentries = 64;
+	}
+
+	seq_printf(file, "entry:     vpn        ppn     asid  size valid wired\n");
+
+	for (entry = 0; entry < nentries; entry++) {
+		unsigned long vpn, ppn, asid, size;
+		unsigned long valid;
+		unsigned long val;
+		const char *sz = "    ?";
+		int i;
+
+		val = __raw_readl(addr1 | (entry << MMU_TLB_ENTRY_SHIFT));
+		ctrl_barrier();
+		vpn = val & 0xfffffc00;
+		valid = val & 0x100;
+
+		val = __raw_readl(addr2 | (entry << MMU_TLB_ENTRY_SHIFT));
+		ctrl_barrier();
+		asid = val & MMU_CONTEXT_ASID_MASK;
+
+		val = __raw_readl(data1 | (entry << MMU_TLB_ENTRY_SHIFT));
+		ctrl_barrier();
+		ppn = (val & 0x0ffffc00) << 4;
+
+		val = __raw_readl(data2 | (entry << MMU_TLB_ENTRY_SHIFT));
+		ctrl_barrier();
+		size = (val & 0xf0) >> 4;
+
+		for (i = 0; i < ARRAY_SIZE(tlb_sizes); i++) {
+			if (tlb_sizes[i].bits == size)
+				break;
+		}
+
+		if (i != ARRAY_SIZE(tlb_sizes))
+			sz = tlb_sizes[i].size;
+
+		seq_printf(file, "%2d:    0x%08lx 0x%08lx %5lu %s   %s     %s\n",
+			   entry, vpn, ppn, asid,
+			   sz, valid ? "V" : "-",
+			   (urb <= entry) ? "W" : "-");
+	}
+
+	back_to_cached();
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+static int tlb_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, tlb_seq_show, inode->i_private);
+}
+
+static const struct file_operations tlb_debugfs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= tlb_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init tlb_debugfs_init(void)
+{
+	struct dentry *itlb, *utlb;
+
+	itlb = debugfs_create_file("itlb", S_IRUSR, arch_debugfs_dir,
+				   (unsigned int *)TLB_TYPE_ITLB,
+				   &tlb_debugfs_fops);
+	if (unlikely(!itlb))
+		return -ENOMEM;
+
+	utlb = debugfs_create_file("utlb", S_IRUSR, arch_debugfs_dir,
+				   (unsigned int *)TLB_TYPE_UTLB,
+				   &tlb_debugfs_fops);
+	if (unlikely(!utlb)) {
+		debugfs_remove(itlb);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+module_init(tlb_debugfs_init);
+
+MODULE_LICENSE("GPL v2");
diff --git a/arch/sh/mm/tlb-pteaex.c b/arch/sh/mm/tlb-pteaex.c
new file mode 100644
index 00000000000..4db21adfe5d
--- /dev/null
+++ b/arch/sh/mm/tlb-pteaex.c
@@ -0,0 +1,106 @@
+/*
+ * arch/sh/mm/tlb-pteaex.c
+ *
+ * TLB operations for SH-X3 CPUs featuring PTE ASID Extensions.
+ *
+ * Copyright (C) 2009 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	unsigned long flags, pteval, vpn;
+
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
+	if (vma && current->active_mm != vma->vm_mm)
+		return;
+
+	local_irq_save(flags);
+
+	/* Set PTEH register */
+	vpn = address & MMU_VPN_MASK;
+	__raw_writel(vpn, MMU_PTEH);
+
+	/* Set PTEAEX */
+	__raw_writel(get_asid(), MMU_PTEAEX);
+
+	pteval = pte.pte_low;
+
+	/* Set PTEA register */
+#ifdef CONFIG_X2TLB
+	/*
+	 * For the extended mode TLB this is trivial, only the ESZ and
+	 * EPR bits need to be written out to PTEA, with the remainder of
+	 * the protection bits (with the exception of the compat-mode SZ
+	 * and PR bits, which are cleared) being written out in PTEL.
+	 */
+	__raw_writel(pte.pte_high, MMU_PTEA);
+#endif
+
+	/* Set PTEL register */
+	pteval &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
+#ifdef CONFIG_CACHE_WRITETHROUGH
+	pteval |= _PAGE_WT;
+#endif
+	/* conveniently, we want all the software flags to be 0 anyway */
+	__raw_writel(pteval, MMU_PTEL);
+
+	/* Load the TLB */
+	asm volatile("ldtlb": /* no output */ : /* no input */ : "memory");
+	local_irq_restore(flags);
+}
+
+/*
+ * While SH-X2 extended TLB mode splits out the memory-mapped I/UTLB
+ * data arrays, SH-X3 cores with PTEAEX split out the memory-mapped
+ * address arrays. In compat mode the second array is inaccessible, while
+ * in extended mode, the legacy 8-bit ASID field in address array 1 has
+ * undefined behaviour.
+ */
+void local_flush_tlb_one(unsigned long asid, unsigned long page)
+{
+	jump_to_uncached();
+	__raw_writel(page, MMU_UTLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT);
+	__raw_writel(asid, MMU_UTLB_ADDRESS_ARRAY2 | MMU_PAGE_ASSOC_BIT);
+	__raw_writel(page, MMU_ITLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT);
+	__raw_writel(asid, MMU_ITLB_ADDRESS_ARRAY2 | MMU_PAGE_ASSOC_BIT);
+	back_to_cached();
+}
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags, status;
+	int i;
+
+	/*
+	 * Flush all the TLB.
+	 */
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	status = __raw_readl(MMUCR);
+	status = ((status & MMUCR_URB) >> MMUCR_URB_SHIFT);
+
+	if (status == 0)
+		status = MMUCR_URB_NENTRIES;
+
+	for (i = 0; i < status; i++)
+		__raw_writel(0x0, MMU_UTLB_ADDRESS_ARRAY | (i << 8));
+
+	for (i = 0; i < 4; i++)
+		__raw_writel(0x0, MMU_ITLB_ADDRESS_ARRAY | (i << 8));
+
+	back_to_cached();
+	ctrl_barrier();
+	local_irq_restore(flags);
+}
diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c
index 7fbfd5a11ff..6554fb439f0 100644
--- a/arch/sh/mm/tlb-sh3.c
+++ b/arch/sh/mm/tlb-sh3.c
@@ -18,54 +18,36 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 
-#include <asm/system.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-void update_mmu_cache(struct vm_area_struct * vma,
-		      unsigned long address, pte_t pte)
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
-	unsigned long flags;
-	unsigned long pteval;
-	unsigned long vpn;
+	unsigned long flags, pteval, vpn;
 
-	/* Ptrace may call this routine. */
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
 	if (vma && current->active_mm != vma->vm_mm)
 		return;
 
-#if defined(CONFIG_SH7705_CACHE_32KB)
-	{
-		struct page *page = pte_page(pte);
-		unsigned long pfn = pte_pfn(pte);
-
-		if (pfn_valid(pfn) && !test_bit(PG_mapped, &page->flags)) {
-			unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
-
-			__flush_wback_region((void *)P1SEGADDR(phys),
-					     PAGE_SIZE);
-			__set_bit(PG_mapped, &page->flags);
-		}
-	}
-#endif
-
 	local_irq_save(flags);
 
 	/* Set PTEH register */
 	vpn = (address & MMU_VPN_MASK) | get_asid();
-	ctrl_outl(vpn, MMU_PTEH);
+	__raw_writel(vpn, MMU_PTEH);
 
 	pteval = pte_val(pte);
 
 	/* Set PTEL register */
 	pteval &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
 	/* conveniently, we want all the software flags to be 0 anyway */
-	ctrl_outl(pteval, MMU_PTEL);
+	__raw_writel(pteval, MMU_PTEL);
 
 	/* Load the TLB */
 	asm volatile("ldtlb": /* no output */ : /* no input */ : "memory");
@@ -92,6 +74,24 @@ void local_flush_tlb_one(unsigned long asid, unsigned long page)
 	}
 
 	for (i = 0; i < ways; i++)
-		ctrl_outl(data, addr + (i << 8));
+		__raw_writel(data, addr + (i << 8));
 }
 
+void local_flush_tlb_all(void)
+{
+	unsigned long flags, status;
+
+	/*
+	 * Flush all the TLB.
+	 *
+	 * Write to the MMU control register's bit:
+	 *	TF-bit for SH-3, TI-bit for SH-4.
+	 *      It's same position, bit #2.
+	 */
+	local_irq_save(flags);
+	status = __raw_readl(MMUCR);
+	status |= 0x04;
+	__raw_writel(status, MMUCR);
+	ctrl_barrier();
+	local_irq_restore(flags);
+}
diff --git a/arch/sh/mm/tlb-sh4.c b/arch/sh/mm/tlb-sh4.c
index f0c7b7397fa..d42dd7e443d 100644
--- a/arch/sh/mm/tlb-sh4.c
+++ b/arch/sh/mm/tlb-sh4.c
@@ -11,43 +11,24 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/io.h>
-#include <asm/system.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
-void update_mmu_cache(struct vm_area_struct * vma,
-		      unsigned long address, pte_t pte)
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
-	unsigned long flags;
-	unsigned long pteval;
-	unsigned long vpn;
+	unsigned long flags, pteval, vpn;
 
-	/* Ptrace may call this routine. */
+	/*
+	 * Handle debugger faulting in for debugee.
+	 */
 	if (vma && current->active_mm != vma->vm_mm)
 		return;
 
-#ifndef CONFIG_CACHE_OFF
-	{
-		unsigned long pfn = pte_pfn(pte);
-
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-
-			if (!test_bit(PG_mapped, &page->flags)) {
-				unsigned long phys = pte_val(pte) & PTE_PHYS_MASK;
-				__flush_wback_region((void *)P1SEGADDR(phys),
-						     PAGE_SIZE);
-				__set_bit(PG_mapped, &page->flags);
-			}
-		}
-	}
-#endif
-
 	local_irq_save(flags);
 
 	/* Set PTEH register */
 	vpn = (address & MMU_VPN_MASK) | get_asid();
-	ctrl_outl(vpn, MMU_PTEH);
+	__raw_writel(vpn, MMU_PTEH);
 
 	pteval = pte.pte_low;
 
@@ -59,11 +40,14 @@ void update_mmu_cache(struct vm_area_struct * vma,
 	 * the protection bits (with the exception of the compat-mode SZ
 	 * and PR bits, which are cleared) being written out in PTEL.
 	 */
-	ctrl_outl(pte.pte_high, MMU_PTEA);
+	__raw_writel(pte.pte_high, MMU_PTEA);
 #else
-	if (cpu_data->flags & CPU_HAS_PTEA)
-		/* TODO: make this look less hacky */
-		ctrl_outl(((pteval >> 28) & 0xe) | (pteval & 0x1), MMU_PTEA);
+	if (cpu_data->flags & CPU_HAS_PTEA) {
+		/* The last 3 bits and the first one of pteval contains
+		 * the PTEA timing control and space attribute bits
+		 */
+		__raw_writel(copy_ptea_attributes(pteval), MMU_PTEA);
+	}
 #endif
 
 	/* Set PTEL register */
@@ -72,15 +56,14 @@ void update_mmu_cache(struct vm_area_struct * vma,
 	pteval |= _PAGE_WT;
 #endif
 	/* conveniently, we want all the software flags to be 0 anyway */
-	ctrl_outl(pteval, MMU_PTEL);
+	__raw_writel(pteval, MMU_PTEL);
 
 	/* Load the TLB */
 	asm volatile("ldtlb": /* no output */ : /* no input */ : "memory");
 	local_irq_restore(flags);
 }
 
-void __uses_jump_to_uncached local_flush_tlb_one(unsigned long asid,
-						 unsigned long page)
+void local_flush_tlb_one(unsigned long asid, unsigned long page)
 {
 	unsigned long addr, data;
 
@@ -93,6 +76,34 @@ void __uses_jump_to_uncached local_flush_tlb_one(unsigned long asid,
 	addr = MMU_UTLB_ADDRESS_ARRAY | MMU_PAGE_ASSOC_BIT;
 	data = page | asid; /* VALID bit is off */
 	jump_to_uncached();
-	ctrl_outl(data, addr);
+	__raw_writel(data, addr);
 	back_to_cached();
 }
+
+void local_flush_tlb_all(void)
+{
+	unsigned long flags, status;
+	int i;
+
+	/*
+	 * Flush all the TLB.
+	 */
+	local_irq_save(flags);
+	jump_to_uncached();
+
+	status = __raw_readl(MMUCR);
+	status = ((status & MMUCR_URB) >> MMUCR_URB_SHIFT);
+
+	if (status == 0)
+		status = MMUCR_URB_NENTRIES;
+
+	for (i = 0; i < status; i++)
+		__raw_writel(0x0, MMU_UTLB_ADDRESS_ARRAY | (i << 8));
+
+	for (i = 0; i < 4; i++)
+		__raw_writel(0x0, MMU_ITLB_ADDRESS_ARRAY | (i << 8));
+
+	back_to_cached();
+	ctrl_barrier();
+	local_irq_restore(flags);
+}
diff --git a/arch/sh/mm/tlb-sh5.c b/arch/sh/mm/tlb-sh5.c
index f34274a1ded..e4bb2a8e0a6 100644
--- a/arch/sh/mm/tlb-sh5.c
+++ b/arch/sh/mm/tlb-sh5.c
@@ -15,11 +15,9 @@
 #include <asm/mmu_context.h>
 
 /**
- * sh64_tlb_init
- *
- * Perform initial setup for the DTLB and ITLB.
+ * sh64_tlb_init - Perform initial setup for the DTLB and ITLB.
  */
-int __init sh64_tlb_init(void)
+int sh64_tlb_init(void)
 {
 	/* Assign some sane DTLB defaults */
 	cpu_data->dtlb.entries	= 64;
@@ -46,9 +44,7 @@ int __init sh64_tlb_init(void)
 }
 
 /**
- * sh64_next_free_dtlb_entry
- *
- * Find the next available DTLB entry
+ * sh64_next_free_dtlb_entry - Find the next available DTLB entry
  */
 unsigned long long sh64_next_free_dtlb_entry(void)
 {
@@ -56,9 +52,7 @@ unsigned long long sh64_next_free_dtlb_entry(void)
 }
 
 /**
- * sh64_get_wired_dtlb_entry
- *
- * Allocate a wired (locked-in) entry in the DTLB
+ * sh64_get_wired_dtlb_entry - Allocate a wired (locked-in) entry in the DTLB
  */
 unsigned long long sh64_get_wired_dtlb_entry(void)
 {
@@ -71,12 +65,10 @@ unsigned long long sh64_get_wired_dtlb_entry(void)
 }
 
 /**
- * sh64_put_wired_dtlb_entry
+ * sh64_put_wired_dtlb_entry - Free a wired (locked-in) entry in the DTLB.
  *
  * @entry:	Address of TLB slot.
  *
- * Free a wired (locked-in) entry in the DTLB.
- *
  * Works like a stack, last one to allocate must be first one to free.
  */
 int sh64_put_wired_dtlb_entry(unsigned long long entry)
@@ -115,7 +107,7 @@ int sh64_put_wired_dtlb_entry(unsigned long long entry)
 }
 
 /**
- * sh64_setup_tlb_slot
+ * sh64_setup_tlb_slot - Load up a translation in a wired slot.
  *
  * @config_addr:	Address of TLB slot.
  * @eaddr:		Virtual address.
@@ -125,26 +117,15 @@ int sh64_put_wired_dtlb_entry(unsigned long long entry)
  * Load up a virtual<->physical translation for @eaddr<->@paddr in the
  * pre-allocated TLB slot @config_addr (see sh64_get_wired_dtlb_entry).
  */
-inline void sh64_setup_tlb_slot(unsigned long long config_addr,
-				unsigned long eaddr,
-				unsigned long asid,
-				unsigned long paddr)
+void sh64_setup_tlb_slot(unsigned long long config_addr, unsigned long eaddr,
+			 unsigned long asid, unsigned long paddr)
 {
 	unsigned long long pteh, ptel;
 
-	/* Sign extension */
-#if (NEFF == 32)
-	pteh = (unsigned long long)(signed long long)(signed long) eaddr;
-#else
-#error "Can't sign extend more than 32 bits yet"
-#endif
+	pteh = neff_sign_extend(eaddr);
 	pteh &= PAGE_MASK;
 	pteh |= (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
-#if (NEFF == 32)
-	ptel = (unsigned long long)(signed long long)(signed long) paddr;
-#else
-#error "Can't sign extend more than 32 bits yet"
-#endif
+	ptel = neff_sign_extend(paddr);
 	ptel &= PAGE_MASK;
 	ptel |= (_PAGE_CACHABLE | _PAGE_READ | _PAGE_WRITE);
 
@@ -154,11 +135,90 @@ inline void sh64_setup_tlb_slot(unsigned long long config_addr,
 }
 
 /**
- * sh64_teardown_tlb_slot
+ * sh64_teardown_tlb_slot - Teardown a translation.
  *
  * @config_addr:	Address of TLB slot.
  *
  * Teardown any existing mapping in the TLB slot @config_addr.
  */
-inline void sh64_teardown_tlb_slot(unsigned long long config_addr)
+void sh64_teardown_tlb_slot(unsigned long long config_addr)
 	__attribute__ ((alias("__flush_tlb_slot")));
+
+static int dtlb_entry;
+static unsigned long long dtlb_entries[64];
+
+void tlb_wire_entry(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+{
+	unsigned long long entry;
+	unsigned long paddr, flags;
+
+	BUG_ON(dtlb_entry == ARRAY_SIZE(dtlb_entries));
+
+	local_irq_save(flags);
+
+	entry = sh64_get_wired_dtlb_entry();
+	dtlb_entries[dtlb_entry++] = entry;
+
+	paddr = pte_val(pte) & _PAGE_FLAGS_HARDWARE_MASK;
+	paddr &= ~PAGE_MASK;
+
+	sh64_setup_tlb_slot(entry, addr, get_asid(), paddr);
+
+	local_irq_restore(flags);
+}
+
+void tlb_unwire_entry(void)
+{
+	unsigned long long entry;
+	unsigned long flags;
+
+	BUG_ON(!dtlb_entry);
+
+	local_irq_save(flags);
+	entry = dtlb_entries[dtlb_entry--];
+
+	sh64_teardown_tlb_slot(entry);
+	sh64_put_wired_dtlb_entry(entry);
+
+	local_irq_restore(flags);
+}
+
+void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	unsigned long long ptel;
+	unsigned long long pteh=0;
+	struct tlb_info *tlbp;
+	unsigned long long next;
+	unsigned int fault_code = get_thread_fault_code();
+
+	/* Get PTEL first */
+	ptel = pte.pte_low;
+
+	/*
+	 * Set PTEH register
+	 */
+	pteh = neff_sign_extend(address & MMU_VPN_MASK);
+
+	/* Set the ASID. */
+	pteh |= get_asid() << PTEH_ASID_SHIFT;
+	pteh |= PTEH_VALID;
+
+	/* Set PTEL register, set_pte has performed the sign extension */
+	ptel &= _PAGE_FLAGS_HARDWARE_MASK; /* drop software flags */
+
+	if (fault_code & FAULT_CODE_ITLB)
+		tlbp = &cpu_data->itlb;
+	else
+		tlbp = &cpu_data->dtlb;
+
+	next = tlbp->next;
+	__flush_tlb_slot(next);
+	asm volatile ("putcfg %0,1,%2\n\n\t"
+		      "putcfg %0,0,%1\n"
+		      :  : "r" (next), "r" (pteh), "r" (ptel) );
+
+	next += TLB_STEP;
+	if (next > tlbp->last)
+		next = tlbp->first;
+	tlbp->next = next;
+}
diff --git a/arch/sh/mm/tlb-urb.c b/arch/sh/mm/tlb-urb.c
new file mode 100644
index 00000000000..c92ce20db39
--- /dev/null
+++ b/arch/sh/mm/tlb-urb.c
@@ -0,0 +1,93 @@
+/*
+ * arch/sh/mm/tlb-urb.c
+ *
+ * TLB entry wiring helpers for URB-equipped parts.
+ *
+ * Copyright (C) 2010  Matt Fleming
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <asm/tlb.h>
+#include <asm/mmu_context.h>
+
+/*
+ * Load the entry for 'addr' into the TLB and wire the entry.
+ */
+void tlb_wire_entry(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+{
+	unsigned long status, flags;
+	int urb;
+
+	local_irq_save(flags);
+
+	status = __raw_readl(MMUCR);
+	urb = (status & MMUCR_URB) >> MMUCR_URB_SHIFT;
+	status &= ~MMUCR_URC;
+
+	/*
+	 * Make sure we're not trying to wire the last TLB entry slot.
+	 */
+	BUG_ON(!--urb);
+
+	urb = urb % MMUCR_URB_NENTRIES;
+
+	/*
+	 * Insert this entry into the highest non-wired TLB slot (via
+	 * the URC field).
+	 */
+	status |= (urb << MMUCR_URC_SHIFT);
+	__raw_writel(status, MMUCR);
+	ctrl_barrier();
+
+	/* Load the entry into the TLB */
+	__update_tlb(vma, addr, pte);
+
+	/* ... and wire it up. */
+	status = __raw_readl(MMUCR);
+
+	status &= ~MMUCR_URB;
+	status |= (urb << MMUCR_URB_SHIFT);
+
+	__raw_writel(status, MMUCR);
+	ctrl_barrier();
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Unwire the last wired TLB entry.
+ *
+ * It should also be noted that it is not possible to wire and unwire
+ * TLB entries in an arbitrary order. If you wire TLB entry N, followed
+ * by entry N+1, you must unwire entry N+1 first, then entry N. In this
+ * respect, it works like a stack or LIFO queue.
+ */
+void tlb_unwire_entry(void)
+{
+	unsigned long status, flags;
+	int urb;
+
+	local_irq_save(flags);
+
+	status = __raw_readl(MMUCR);
+	urb = (status & MMUCR_URB) >> MMUCR_URB_SHIFT;
+	status &= ~MMUCR_URB;
+
+	/*
+	 * Make sure we're not trying to unwire a TLB entry when none
+	 * have been wired.
+	 */
+	BUG_ON(urb++ == MMUCR_URB_NENTRIES);
+
+	urb = urb % MMUCR_URB_NENTRIES;
+
+	status |= (urb << MMUCR_URB_SHIFT);
+	__raw_writel(status, MMUCR);
+	ctrl_barrier();
+
+	local_irq_restore(flags);
+}
diff --git a/arch/sh/mm/tlbex_32.c b/arch/sh/mm/tlbex_32.c
new file mode 100644
index 00000000000..382262dc0c4
--- /dev/null
+++ b/arch/sh/mm/tlbex_32.c
@@ -0,0 +1,78 @@
+/*
+ * TLB miss handler for SH with an MMU.
+ *
+ *  Copyright (C) 1999  Niibe Yutaka
+ *  Copyright (C) 2003 - 2012  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <asm/mmu_context.h>
+#include <asm/thread_info.h>
+
+/*
+ * Called with interrupts disabled.
+ */
+asmlinkage int __kprobes
+handle_tlbmiss(struct pt_regs *regs, unsigned long error_code,
+	       unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+
+	/*
+	 * We don't take page faults for P1, P2, and parts of P4, these
+	 * are always mapped, whether it be due to legacy behaviour in
+	 * 29-bit mode, or due to PMB configuration in 32-bit mode.
+	 */
+	if (address >= P3SEG && address < P3_ADDR_MAX) {
+		pgd = pgd_offset_k(address);
+	} else {
+		if (unlikely(address >= TASK_SIZE || !current->mm))
+			return 1;
+
+		pgd = pgd_offset(current->mm, address);
+	}
+
+	pud = pud_offset(pgd, address);
+	if (pud_none_or_clear_bad(pud))
+		return 1;
+	pmd = pmd_offset(pud, address);
+	if (pmd_none_or_clear_bad(pmd))
+		return 1;
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+	if (unlikely(pte_none(entry) || pte_not_present(entry)))
+		return 1;
+	if (unlikely(error_code && !pte_write(entry)))
+		return 1;
+
+	if (error_code)
+		entry = pte_mkdirty(entry);
+	entry = pte_mkyoung(entry);
+
+	set_pte(pte, entry);
+
+#if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
+	/*
+	 * SH-4 does not set MMUCR.RC to the corresponding TLB entry in
+	 * the case of an initial page write exception, so we need to
+	 * flush it in order to avoid potential TLB entry duplication.
+	 */
+	if (error_code == FAULT_CODE_INITIAL)
+		local_flush_tlb_one(get_asid(), address & PAGE_MASK);
+#endif
+
+	set_thread_fault_code(error_code);
+	update_mmu_cache(NULL, address, pte);
+
+	return 0;
+}
diff --git a/arch/sh/mm/tlbex_64.c b/arch/sh/mm/tlbex_64.c
new file mode 100644
index 00000000000..8557548fc53
--- /dev/null
+++ b/arch/sh/mm/tlbex_64.c
@@ -0,0 +1,166 @@
+/*
+ * The SH64 TLB miss.
+ *
+ * Original code from fault.c
+ * Copyright (C) 2000, 2001  Paolo Alberelli
+ *
+ * Fast PTE->TLB refill path
+ * Copyright (C) 2003 Richard.Curnow@superh.com
+ *
+ * IMPORTANT NOTES :
+ * The do_fast_page_fault function is called from a context in entry.S
+ * where very few registers have been saved.  In particular, the code in
+ * this file must be compiled not to use ANY caller-save registers that
+ * are not part of the restricted save set.  Also, it means that code in
+ * this file must not make calls to functions elsewhere in the kernel, or
+ * else the excepting context will see corruption in its caller-save
+ * registers.  Plus, the entry.S save area is non-reentrant, so this code
+ * has to run with SR.BL==1, i.e. no interrupts taken inside it and panic
+ * on any exception.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/kprobes.h>
+#include <asm/tlb.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+
+static int handle_tlbmiss(unsigned long long protection_flags,
+			  unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+
+	if (is_vmalloc_addr((void *)address)) {
+		pgd = pgd_offset_k(address);
+	} else {
+		if (unlikely(address >= TASK_SIZE || !current->mm))
+			return 1;
+
+		pgd = pgd_offset(current->mm, address);
+	}
+
+	pud = pud_offset(pgd, address);
+	if (pud_none(*pud) || !pud_present(*pud))
+		return 1;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd) || !pmd_present(*pmd))
+		return 1;
+
+	pte = pte_offset_kernel(pmd, address);
+	entry = *pte;
+	if (pte_none(entry) || !pte_present(entry))
+		return 1;
+
+	/*
+	 * If the page doesn't have sufficient protection bits set to
+	 * service the kind of fault being handled, there's not much
+	 * point doing the TLB refill.  Punt the fault to the general
+	 * handler.
+	 */
+	if ((pte_val(entry) & protection_flags) != protection_flags)
+		return 1;
+
+	update_mmu_cache(NULL, address, pte);
+
+	return 0;
+}
+
+/*
+ * Put all this information into one structure so that everything is just
+ * arithmetic relative to a single base address.  This reduces the number
+ * of movi/shori pairs needed just to load addresses of static data.
+ */
+struct expevt_lookup {
+	unsigned short protection_flags[8];
+	unsigned char  is_text_access[8];
+	unsigned char  is_write_access[8];
+};
+
+#define PRU (1<<9)
+#define PRW (1<<8)
+#define PRX (1<<7)
+#define PRR (1<<6)
+
+/* Sized as 8 rather than 4 to allow checking the PTE's PRU bit against whether
+   the fault happened in user mode or privileged mode. */
+static struct expevt_lookup expevt_lookup_table = {
+	.protection_flags = {PRX, PRX, 0, 0, PRR, PRR, PRW, PRW},
+	.is_text_access   = {1,   1,   0, 0, 0,   0,   0,   0}
+};
+
+static inline unsigned int
+expevt_to_fault_code(unsigned long expevt)
+{
+	if (expevt == 0xa40)
+		return FAULT_CODE_ITLB;
+	else if (expevt == 0x060)
+		return FAULT_CODE_WRITE;
+
+	return 0;
+}
+
+/*
+   This routine handles page faults that can be serviced just by refilling a
+   TLB entry from an existing page table entry.  (This case represents a very
+   large majority of page faults.) Return 1 if the fault was successfully
+   handled.  Return 0 if the fault could not be handled.  (This leads into the
+   general fault handling in fault.c which deals with mapping file-backed
+   pages, stack growth, segmentation faults, swapping etc etc)
+ */
+asmlinkage int __kprobes
+do_fast_page_fault(unsigned long long ssr_md, unsigned long long expevt,
+		   unsigned long address)
+{
+	unsigned long long protection_flags;
+	unsigned long long index;
+	unsigned long long expevt4;
+	unsigned int fault_code;
+
+	/* The next few lines implement a way of hashing EXPEVT into a
+	 * small array index which can be used to lookup parameters
+	 * specific to the type of TLBMISS being handled.
+	 *
+	 * Note:
+	 *	ITLBMISS has EXPEVT==0xa40
+	 *	RTLBMISS has EXPEVT==0x040
+	 *	WTLBMISS has EXPEVT==0x060
+	 */
+	expevt4 = (expevt >> 4);
+	/* TODO : xor ssr_md into this expression too. Then we can check
+	 * that PRU is set when it needs to be. */
+	index = expevt4 ^ (expevt4 >> 5);
+	index &= 7;
+
+	fault_code = expevt_to_fault_code(expevt);
+
+	protection_flags = expevt_lookup_table.protection_flags[index];
+
+	if (expevt_lookup_table.is_text_access[index])
+		fault_code |= FAULT_CODE_ITLB;
+	if (!ssr_md)
+		fault_code |= FAULT_CODE_USER;
+
+	set_thread_fault_code(fault_code);
+
+	return handle_tlbmiss(protection_flags, address);
+}
diff --git a/arch/sh/mm/tlbflush_32.c b/arch/sh/mm/tlbflush_32.c
index 6f45c1f8a7f..a6a20d6de4c 100644
--- a/arch/sh/mm/tlbflush_32.c
+++ b/arch/sh/mm/tlbflush_32.c
@@ -120,21 +120,18 @@ void local_flush_tlb_mm(struct mm_struct *mm)
 	}
 }
 
-void local_flush_tlb_all(void)
+void __flush_tlb_global(void)
 {
-	unsigned long flags, status;
+	unsigned long flags;
+
+	local_irq_save(flags);
 
 	/*
-	 * Flush all the TLB.
-	 *
-	 * Write to the MMU control register's bit:
-	 *	TF-bit for SH-3, TI-bit for SH-4.
-	 *      It's same position, bit #2.
+	 * This is the most destructive of the TLB flushing options,
+	 * and will tear down all of the UTLB/ITLB mappings, including
+	 * wired entries.
 	 */
-	local_irq_save(flags);
-	status = ctrl_inl(MMUCR);
-	status |= 0x04;
-	ctrl_outl(status, MMUCR);
-	ctrl_barrier();
+	__raw_writel(__raw_readl(MMUCR) | MMUCR_TI, MMUCR);
+
 	local_irq_restore(flags);
 }
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index 7876997ba19..f33fdd2558e 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2000, 2001  Paolo Alberelli
  * Copyright (C) 2003  Richard Curnow (/proc/tlb, bug fixes)
- * Copyright (C) 2003  Paul Mundt
+ * Copyright (C) 2003 - 2012 Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -20,322 +20,14 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
+#include <linux/perf_event.h>
 #include <linux/interrupt.h>
-#include <asm/system.h>
 #include <asm/io.h>
 #include <asm/tlb.h>
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
 
-extern void die(const char *,struct pt_regs *,long);
-
-#define PFLAG(val,flag)   (( (val) & (flag) ) ? #flag : "" )
-#define PPROT(flag) PFLAG(pgprot_val(prot),flag)
-
-static inline void print_prots(pgprot_t prot)
-{
-	printk("prot is 0x%08lx\n",pgprot_val(prot));
-
-	printk("%s %s %s %s %s\n",PPROT(_PAGE_SHARED),PPROT(_PAGE_READ),
-	       PPROT(_PAGE_EXECUTE),PPROT(_PAGE_WRITE),PPROT(_PAGE_USER));
-}
-
-static inline void print_vma(struct vm_area_struct *vma)
-{
-	printk("vma start 0x%08lx\n", vma->vm_start);
-	printk("vma end   0x%08lx\n", vma->vm_end);
-
-	print_prots(vma->vm_page_prot);
-	printk("vm_flags 0x%08lx\n", vma->vm_flags);
-}
-
-static inline void print_task(struct task_struct *tsk)
-{
-	printk("Task pid %d\n", task_pid_nr(tsk));
-}
-
-static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address)
-{
-	pgd_t *dir;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	pte_t entry;
-
-	dir = pgd_offset(mm, address);
-	if (pgd_none(*dir))
-		return NULL;
-
-	pud = pud_offset(dir, address);
-	if (pud_none(*pud))
-		return NULL;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd))
-		return NULL;
-
-	pte = pte_offset_kernel(pmd, address);
-	entry = *pte;
-	if (pte_none(entry) || !pte_present(entry))
-		return NULL;
-
-	return pte;
-}
-
-/*
- * This routine handles page faults.  It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- */
-asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
-			      unsigned long textaccess, unsigned long address)
-{
-	struct task_struct *tsk;
-	struct mm_struct *mm;
-	struct vm_area_struct * vma;
-	const struct exception_table_entry *fixup;
-	pte_t *pte;
-	int fault;
-
-	/* SIM
-	 * Note this is now called with interrupts still disabled
-	 * This is to cope with being called for a missing IO port
-	 * address with interrupts disabled. This should be fixed as
-	 * soon as we have a better 'fast path' miss handler.
-	 *
-	 * Plus take care how you try and debug this stuff.
-	 * For example, writing debug data to a port which you
-	 * have just faulted on is not going to work.
-	 */
-
-	tsk = current;
-	mm = tsk->mm;
-
-	/* Not an IO address, so reenable interrupts */
-	local_irq_enable();
-
-	/*
-	 * If we're in an interrupt or have no user
-	 * context, we must not take the fault..
-	 */
-	if (in_atomic() || !mm)
-		goto no_context;
-
-	/* TLB misses upon some cache flushes get done under cli() */
-	down_read(&mm->mmap_sem);
-
-	vma = find_vma(mm, address);
-
-	if (!vma) {
-#ifdef DEBUG_FAULT
-		print_task(tsk);
-		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
-		       __func__, __LINE__,
-		       address,regs->pc,textaccess,writeaccess);
-		show_regs(regs);
-#endif
-		goto bad_area;
-	}
-	if (vma->vm_start <= address) {
-		goto good_area;
-	}
-
-	if (!(vma->vm_flags & VM_GROWSDOWN)) {
-#ifdef DEBUG_FAULT
-		print_task(tsk);
-		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
-		       __func__, __LINE__,
-		       address,regs->pc,textaccess,writeaccess);
-		show_regs(regs);
-
-		print_vma(vma);
-#endif
-		goto bad_area;
-	}
-	if (expand_stack(vma, address)) {
-#ifdef DEBUG_FAULT
-		print_task(tsk);
-		printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
-		       __func__, __LINE__,
-		       address,regs->pc,textaccess,writeaccess);
-		show_regs(regs);
-#endif
-		goto bad_area;
-	}
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
-	if (textaccess) {
-		if (!(vma->vm_flags & VM_EXEC))
-			goto bad_area;
-	} else {
-		if (writeaccess) {
-			if (!(vma->vm_flags & VM_WRITE))
-				goto bad_area;
-		} else {
-			if (!(vma->vm_flags & VM_READ))
-				goto bad_area;
-		}
-	}
-
-	/*
-	 * If for any reason at all we couldn't handle the fault,
-	 * make sure we exit gracefully rather than endlessly redo
-	 * the fault.
-	 */
-survive:
-	fault = handle_mm_fault(mm, vma, address, writeaccess);
-	if (unlikely(fault & VM_FAULT_ERROR)) {
-		if (fault & VM_FAULT_OOM)
-			goto out_of_memory;
-		else if (fault & VM_FAULT_SIGBUS)
-			goto do_sigbus;
-		BUG();
-	}
-	if (fault & VM_FAULT_MAJOR)
-		tsk->maj_flt++;
-	else
-		tsk->min_flt++;
-
-	/* If we get here, the page fault has been handled.  Do the TLB refill
-	   now from the newly-setup PTE, to avoid having to fault again right
-	   away on the same instruction. */
-	pte = lookup_pte (mm, address);
-	if (!pte) {
-		/* From empirical evidence, we can get here, due to
-		   !pte_present(pte).  (e.g. if a swap-in occurs, and the page
-		   is swapped back out again before the process that wanted it
-		   gets rescheduled?) */
-		goto no_pte;
-	}
-
-	__do_tlb_refill(address, textaccess, pte);
-
-no_pte:
-
-	up_read(&mm->mmap_sem);
-	return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
-#ifdef DEBUG_FAULT
-	printk("fault:bad area\n");
-#endif
-	up_read(&mm->mmap_sem);
-
-	if (user_mode(regs)) {
-		static int count=0;
-		siginfo_t info;
-		if (count < 4) {
-			/* This is really to help debug faults when starting
-			 * usermode, so only need a few */
-			count++;
-			printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n",
-				address, task_pid_nr(current), current->comm,
-				(unsigned long) regs->pc);
-#if 0
-			show_regs(regs);
-#endif
-		}
-		if (is_global_init(tsk)) {
-			panic("INIT had user mode bad_area\n");
-		}
-		tsk->thread.address = address;
-		tsk->thread.error_code = writeaccess;
-		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		info.si_addr = (void *) address;
-		force_sig_info(SIGSEGV, &info, tsk);
-		return;
-	}
-
-no_context:
-#ifdef DEBUG_FAULT
-	printk("fault:No context\n");
-#endif
-	/* Are we prepared to handle this kernel fault?  */
-	fixup = search_exception_tables(regs->pc);
-	if (fixup) {
-		regs->pc = fixup->fixup;
-		return;
-	}
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- *
- */
-	if (address < PAGE_SIZE)
-		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
-	else
-		printk(KERN_ALERT "Unable to handle kernel paging request");
-	printk(" at virtual address %08lx\n", address);
-	printk(KERN_ALERT "pc = %08Lx%08Lx\n", regs->pc >> 32, regs->pc & 0xffffffff);
-	die("Oops", regs, writeaccess);
-	do_exit(SIGKILL);
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
-	if (is_global_init(current)) {
-		panic("INIT out of memory\n");
-		yield();
-		goto survive;
-	}
-	printk("fault:Out of memory\n");
-	up_read(&mm->mmap_sem);
-	if (is_global_init(current)) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (user_mode(regs))
-		do_group_exit(SIGKILL);
-	goto no_context;
-
-do_sigbus:
-	printk("fault:Do sigbus\n");
-	up_read(&mm->mmap_sem);
-
-	/*
-	 * Send a sigbus, regardless of whether we were in kernel
-	 * or user mode.
-	 */
-	tsk->thread.address = address;
-	tsk->thread.error_code = writeaccess;
-	tsk->thread.trap_no = 14;
-	force_sig(SIGBUS, tsk);
-
-	/* Kernel mode? Handle exceptions or die */
-	if (!user_mode(regs))
-		goto no_context;
-}
-
-void update_mmu_cache(struct vm_area_struct * vma,
-			unsigned long address, pte_t pte)
-{
-	/*
-	 * This appears to get called once for every pte entry that gets
-	 * established => I don't think it's efficient to try refilling the
-	 * TLBs with the pages - some may not get accessed even.  Also, for
-	 * executable pages, it is impossible to determine reliably here which
-	 * TLB they should be mapped into (or both even).
-	 *
-	 * So, just do nothing here and handle faults on demand.  In the
-	 * TLBMISS handling case, the refill is now done anyway after the pte
-	 * has been fixed up, so that deals with most useful cases.
-	 */
-}
-
 void local_flush_tlb_one(unsigned long asid, unsigned long page)
 {
 	unsigned long long match, pteh=0, lpage;
@@ -344,7 +36,7 @@ void local_flush_tlb_one(unsigned long asid, unsigned long page)
 	/*
 	 * Sign-extend based on neff.
 	 */
-	lpage = (page & NEFF_SIGN) ? (page | NEFF_MASK) : page;
+	lpage = neff_sign_extend(page);
 	match = (asid << PTEH_ASID_SHIFT) | PTEH_VALID;
 	match |= lpage;
 
@@ -473,3 +165,8 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
         /* FIXME: Optimize this later.. */
         flush_tlb_all();
 }
+
+void __flush_tlb_global(void)
+{
+	flush_tlb_all();
+}
diff --git a/arch/sh/mm/uncached.c b/arch/sh/mm/uncached.c
new file mode 100644
index 00000000000..a7767da815e
--- /dev/null
+++ b/arch/sh/mm/uncached.c
@@ -0,0 +1,43 @@
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/sizes.h>
+#include <asm/page.h>
+#include <asm/addrspace.h>
+
+/*
+ * This is the offset of the uncached section from its cached alias.
+ *
+ * Legacy platforms handle trivial transitions between cached and
+ * uncached segments by making use of the 1:1 mapping relationship in
+ * 512MB lowmem, others via a special uncached mapping.
+ *
+ * Default value only valid in 29 bit mode, in 32bit mode this will be
+ * updated by the early PMB initialization code.
+ */
+unsigned long cached_to_uncached = SZ_512M;
+unsigned long uncached_size = SZ_512M;
+unsigned long uncached_start, uncached_end;
+EXPORT_SYMBOL(uncached_start);
+EXPORT_SYMBOL(uncached_end);
+
+int virt_addr_uncached(unsigned long kaddr)
+{
+	return (kaddr >= uncached_start) && (kaddr < uncached_end);
+}
+EXPORT_SYMBOL(virt_addr_uncached);
+
+void __init uncached_init(void)
+{
+#if defined(CONFIG_29BIT) || !defined(CONFIG_MMU)
+	uncached_start = P2SEG;
+#else
+	uncached_start = memory_end;
+#endif
+	uncached_end = uncached_start + uncached_size;
+}
+
+void __init uncached_resize(unsigned long size)
+{
+	uncached_size = size;
+	uncached_end = uncached_start + uncached_size;
+}