27 files changed, 4477 insertions, 2617 deletions
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index 4efbb9df044..c3a4e9ceba3 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -1,21 +1,4 @@
-config ARM_GIC
-	bool
-
-config ARM_VIC
-	bool
-
-config ARM_VIC_NR
-	int
-	default 2
-	depends on ARM_VIC
-	help
-	  The maximum number of VICs available in the system, for
-	  power management.
-
-config ICST525
-	bool
-
-config ICST307
+config ICST
 	bool
 
 config SA1111
@@ -26,9 +9,6 @@ config DMABOUNCE
 	bool
 	select ZONE_DMA
 
-config TIMER_ACORN
-	bool
-
 config SHARP_LOCOMO
 	bool
 
@@ -38,5 +18,5 @@ config SHARP_PARAM
 config SHARP_SCOOP
 	bool
 
-config COMMON_CLKDEV
+config TI_PRIV_EDMA
 	bool
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 76be7ff2a7c..70b1eff477b 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -2,18 +2,20 @@
 # Makefile for the linux kernel.
 #
 
-obj-$(CONFIG_ARM_GIC)		+= gic.o
-obj-$(CONFIG_ARM_VIC)		+= vic.o
-obj-$(CONFIG_ICST525)		+= icst525.o
-obj-$(CONFIG_ICST307)		+= icst307.o
+obj-y				+= firmware.o
+
+obj-$(CONFIG_ICST)		+= icst.o
 obj-$(CONFIG_SA1111)		+= sa1111.o
-obj-$(CONFIG_PCI_HOST_VIA82C505) += via82c505.o
 obj-$(CONFIG_DMABOUNCE)		+= dmabounce.o
-obj-$(CONFIG_TIMER_ACORN)	+= time-acorn.o
 obj-$(CONFIG_SHARP_LOCOMO)	+= locomo.o
 obj-$(CONFIG_SHARP_PARAM)	+= sharpsl_param.o
 obj-$(CONFIG_SHARP_SCOOP)	+= scoop.o
-obj-$(CONFIG_ARCH_IXP2000)	+= uengine.o
-obj-$(CONFIG_ARCH_IXP23XX)	+= uengine.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
-obj-$(CONFIG_COMMON_CLKDEV)	+= clkdev.o
+obj-$(CONFIG_ARM_TIMER_SP804)	+= timer-sp.o
+obj-$(CONFIG_MCPM)		+= mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
+CFLAGS_REMOVE_mcpm_entry.o	= -pg
+AFLAGS_mcpm_head.o		:= -march=armv7-a
+AFLAGS_vlock.o			:= -march=armv7-a
+obj-$(CONFIG_TI_PRIV_EDMA)	+= edma.o
+obj-$(CONFIG_BL_SWITCHER)	+= bL_switcher.o
+obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
new file mode 100644
index 00000000000..490f3dced74
--- /dev/null
+++ b/arch/arm/common/bL_switcher.c
@@ -0,0 +1,824 @@
+/*
+ * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver
+ *
+ * Created by:	Nicolas Pitre, March 2012
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/atomic.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cpu_pm.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/time.h>
+#include <linux/clockchips.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/notifier.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/moduleparam.h>
+
+#include <asm/smp_plat.h>
+#include <asm/cputype.h>
+#include <asm/suspend.h>
+#include <asm/mcpm.h>
+#include <asm/bL_switcher.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/power_cpu_migrate.h>
+
+
+/*
+ * Use our own MPIDR accessors as the generic ones in asm/cputype.h have
+ * __attribute_const__ and we don't want the compiler to assume any
+ * constness here as the value _does_ change along some code paths.
+ */
+
+static int read_mpidr(void)
+{
+	unsigned int id;
+	asm volatile ("mrc p15, 0, %0, c0, c0, 5" : "=r" (id));
+	return id & MPIDR_HWID_BITMASK;
+}
+
+/*
+ * Get a global nanosecond time stamp for tracing.
+ */
+static s64 get_ns(void)
+{
+	struct timespec ts;
+	getnstimeofday(&ts);
+	return timespec_to_ns(&ts);
+}
+
+/*
+ * bL switcher core code.
+ */
+
+static void bL_do_switch(void *_arg)
+{
+	unsigned ib_mpidr, ib_cpu, ib_cluster;
+	long volatile handshake, **handshake_ptr = _arg;
+
+	pr_debug("%s\n", __func__);
+
+	ib_mpidr = cpu_logical_map(smp_processor_id());
+	ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+	ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+
+	/* Advertise our handshake location */
+	if (handshake_ptr) {
+		handshake = 0;
+		*handshake_ptr = &handshake;
+	} else
+		handshake = -1;
+
+	/*
+	 * Our state has been saved at this point.  Let's release our
+	 * inbound CPU.
+	 */
+	mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume);
+	sev();
+
+	/*
+	 * From this point, we must assume that our counterpart CPU might
+	 * have taken over in its parallel world already, as if execution
+	 * just returned from cpu_suspend().  It is therefore important to
+	 * be very careful not to make any change the other guy is not
+	 * expecting.  This is why we need stack isolation.
+	 *
+	 * Fancy under cover tasks could be performed here.  For now
+	 * we have none.
+	 */
+
+	/*
+	 * Let's wait until our inbound is alive.
+	 */
+	while (!handshake) {
+		wfe();
+		smp_mb();
+	}
+
+	/* Let's put ourself down. */
+	mcpm_cpu_power_down();
+
+	/* should never get here */
+	BUG();
+}
+
+/*
+ * Stack isolation.  To ensure 'current' remains valid, we just use another
+ * piece of our thread's stack space which should be fairly lightly used.
+ * The selected area starts just above the thread_info structure located
+ * at the very bottom of the stack, aligned to a cache line, and indexed
+ * with the cluster number.
+ */
+#define STACK_SIZE 512
+extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+static int bL_switchpoint(unsigned long _arg)
+{
+	unsigned int mpidr = read_mpidr();
+	unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	void *stack = current_thread_info() + 1;
+	stack = PTR_ALIGN(stack, L1_CACHE_BYTES);
+	stack += clusterid * STACK_SIZE + STACK_SIZE;
+	call_with_stack(bL_do_switch, (void *)_arg, stack);
+	BUG();
+}
+
+/*
+ * Generic switcher interface
+ */
+
+static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
+static int bL_switcher_cpu_pairing[NR_CPUS];
+
+/*
+ * bL_switch_to - Switch to a specific cluster for the current CPU
+ * @new_cluster_id: the ID of the cluster to switch to.
+ *
+ * This function must be called on the CPU to be switched.
+ * Returns 0 on success, else a negative status code.
+ */
+static int bL_switch_to(unsigned int new_cluster_id)
+{
+	unsigned int mpidr, this_cpu, that_cpu;
+	unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
+	struct completion inbound_alive;
+	struct tick_device *tdev;
+	enum clock_event_mode tdev_mode;
+	long volatile *handshake_ptr;
+	int ipi_nr, ret;
+
+	this_cpu = smp_processor_id();
+	ob_mpidr = read_mpidr();
+	ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0);
+	ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1);
+	BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr);
+
+	if (new_cluster_id == ob_cluster)
+		return 0;
+
+	that_cpu = bL_switcher_cpu_pairing[this_cpu];
+	ib_mpidr = cpu_logical_map(that_cpu);
+	ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+	ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+
+	pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n",
+		 this_cpu, ob_mpidr, ib_mpidr);
+
+	this_cpu = smp_processor_id();
+
+	/* Close the gate for our entry vectors */
+	mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL);
+	mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL);
+
+	/* Install our "inbound alive" notifier. */
+	init_completion(&inbound_alive);
+	ipi_nr = register_ipi_completion(&inbound_alive, this_cpu);
+	ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]);
+	mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr);
+
+	/*
+	 * Let's wake up the inbound CPU now in case it requires some delay
+	 * to come online, but leave it gated in our entry vector code.
+	 */
+	ret = mcpm_cpu_power_up(ib_cpu, ib_cluster);
+	if (ret) {
+		pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
+		return ret;
+	}
+
+	/*
+	 * Raise a SGI on the inbound CPU to make sure it doesn't stall
+	 * in a possible WFI, such as in bL_power_down().
+	 */
+	gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0);
+
+	/*
+	 * Wait for the inbound to come up.  This allows for other
+	 * tasks to be scheduled in the mean time.
+	 */
+	wait_for_completion(&inbound_alive);
+	mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0);
+
+	/*
+	 * From this point we are entering the switch critical zone
+	 * and can't take any interrupts anymore.
+	 */
+	local_irq_disable();
+	local_fiq_disable();
+	trace_cpu_migrate_begin(get_ns(), ob_mpidr);
+
+	/* redirect GIC's SGIs to our counterpart */
+	gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
+
+	tdev = tick_get_device(this_cpu);
+	if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
+		tdev = NULL;
+	if (tdev) {
+		tdev_mode = tdev->evtdev->mode;
+		clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
+	}
+
+	ret = cpu_pm_enter();
+
+	/* we can not tolerate errors at this point */
+	if (ret)
+		panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
+
+	/* Swap the physical CPUs in the logical map for this logical CPU. */
+	cpu_logical_map(this_cpu) = ib_mpidr;
+	cpu_logical_map(that_cpu) = ob_mpidr;
+
+	/* Let's do the actual CPU switch. */
+	ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint);
+	if (ret > 0)
+		panic("%s: cpu_suspend() returned %d\n", __func__, ret);
+
+	/* We are executing on the inbound CPU at this point */
+	mpidr = read_mpidr();
+	pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr);
+	BUG_ON(mpidr != ib_mpidr);
+
+	mcpm_cpu_powered_up();
+
+	ret = cpu_pm_exit();
+
+	if (tdev) {
+		clockevents_set_mode(tdev->evtdev, tdev_mode);
+		clockevents_program_event(tdev->evtdev,
+					  tdev->evtdev->next_event, 1);
+	}
+
+	trace_cpu_migrate_finish(get_ns(), ib_mpidr);
+	local_fiq_enable();
+	local_irq_enable();
+
+	*handshake_ptr = 1;
+	dsb_sev();
+
+	if (ret)
+		pr_err("%s exiting with error %d\n", __func__, ret);
+	return ret;
+}
+
+struct bL_thread {
+	spinlock_t lock;
+	struct task_struct *task;
+	wait_queue_head_t wq;
+	int wanted_cluster;
+	struct completion started;
+	bL_switch_completion_handler completer;
+	void *completer_cookie;
+};
+
+static struct bL_thread bL_threads[NR_CPUS];
+
+static int bL_switcher_thread(void *arg)
+{
+	struct bL_thread *t = arg;
+	struct sched_param param = { .sched_priority = 1 };
+	int cluster;
+	bL_switch_completion_handler completer;
+	void *completer_cookie;
+
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
+	complete(&t->started);
+
+	do {
+		if (signal_pending(current))
+			flush_signals(current);
+		wait_event_interruptible(t->wq,
+				t->wanted_cluster != -1 ||
+				kthread_should_stop());
+
+		spin_lock(&t->lock);
+		cluster = t->wanted_cluster;
+		completer = t->completer;
+		completer_cookie = t->completer_cookie;
+		t->wanted_cluster = -1;
+		t->completer = NULL;
+		spin_unlock(&t->lock);
+
+		if (cluster != -1) {
+			bL_switch_to(cluster);
+
+			if (completer)
+				completer(completer_cookie);
+		}
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+static struct task_struct *bL_switcher_thread_create(int cpu, void *arg)
+{
+	struct task_struct *task;
+
+	task = kthread_create_on_node(bL_switcher_thread, arg,
+				      cpu_to_node(cpu), "kswitcher_%d", cpu);
+	if (!IS_ERR(task)) {
+		kthread_bind(task, cpu);
+		wake_up_process(task);
+	} else
+		pr_err("%s failed for CPU %d\n", __func__, cpu);
+	return task;
+}
+
+/*
+ * bL_switch_request_cb - Switch to a specific cluster for the given CPU,
+ *      with completion notification via a callback
+ *
+ * @cpu: the CPU to switch
+ * @new_cluster_id: the ID of the cluster to switch to.
+ * @completer: switch completion callback.  if non-NULL,
+ *	@completer(@completer_cookie) will be called on completion of
+ *	the switch, in non-atomic context.
+ * @completer_cookie: opaque context argument for @completer.
+ *
+ * This function causes a cluster switch on the given CPU by waking up
+ * the appropriate switcher thread.  This function may or may not return
+ * before the switch has occurred.
+ *
+ * If a @completer callback function is supplied, it will be called when
+ * the switch is complete.  This can be used to determine asynchronously
+ * when the switch is complete, regardless of when bL_switch_request()
+ * returns.  When @completer is supplied, no new switch request is permitted
+ * for the affected CPU until after the switch is complete, and @completer
+ * has returned.
+ */
+int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
+			 bL_switch_completion_handler completer,
+			 void *completer_cookie)
+{
+	struct bL_thread *t;
+
+	if (cpu >= ARRAY_SIZE(bL_threads)) {
+		pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
+		return -EINVAL;
+	}
+
+	t = &bL_threads[cpu];
+
+	if (IS_ERR(t->task))
+		return PTR_ERR(t->task);
+	if (!t->task)
+		return -ESRCH;
+
+	spin_lock(&t->lock);
+	if (t->completer) {
+		spin_unlock(&t->lock);
+		return -EBUSY;
+	}
+	t->completer = completer;
+	t->completer_cookie = completer_cookie;
+	t->wanted_cluster = new_cluster_id;
+	spin_unlock(&t->lock);
+	wake_up(&t->wq);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bL_switch_request_cb);
+
+/*
+ * Activation and configuration code.
+ */
+
+static DEFINE_MUTEX(bL_switcher_activation_lock);
+static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier);
+static unsigned int bL_switcher_active;
+static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS];
+static cpumask_t bL_switcher_removed_logical_cpus;
+
+int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_register_notifier);
+
+int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier);
+
+static int bL_activation_notify(unsigned long val)
+{
+	int ret;
+
+	ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL);
+	if (ret & NOTIFY_STOP_MASK)
+		pr_err("%s: notifier chain failed with status 0x%x\n",
+			__func__, ret);
+	return notifier_to_errno(ret);
+}
+
+static void bL_switcher_restore_cpus(void)
+{
+	int i;
+
+	for_each_cpu(i, &bL_switcher_removed_logical_cpus) {
+		struct device *cpu_dev = get_cpu_device(i);
+		int ret = device_online(cpu_dev);
+		if (ret)
+			dev_err(cpu_dev, "switcher: unable to restore CPU\n");
+	}
+}
+
+static int bL_switcher_halve_cpus(void)
+{
+	int i, j, cluster_0, gic_id, ret;
+	unsigned int cpu, cluster, mask;
+	cpumask_t available_cpus;
+
+	/* First pass to validate what we have */
+	mask = 0;
+	for_each_online_cpu(i) {
+		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+		if (cluster >= 2) {
+			pr_err("%s: only dual cluster systems are supported\n", __func__);
+			return -EINVAL;
+		}
+		if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER))
+			return -EINVAL;
+		mask |= (1 << cluster);
+	}
+	if (mask != 3) {
+		pr_err("%s: no CPU pairing possible\n", __func__);
+		return -EINVAL;
+	}
+
+	/*
+	 * Now let's do the pairing.  We match each CPU with another CPU
+	 * from a different cluster.  To get a uniform scheduling behavior
+	 * without fiddling with CPU topology and compute capacity data,
+	 * we'll use logical CPUs initially belonging to the same cluster.
+	 */
+	memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing));
+	cpumask_copy(&available_cpus, cpu_online_mask);
+	cluster_0 = -1;
+	for_each_cpu(i, &available_cpus) {
+		int match = -1;
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+		if (cluster_0 == -1)
+			cluster_0 = cluster;
+		if (cluster != cluster_0)
+			continue;
+		cpumask_clear_cpu(i, &available_cpus);
+		for_each_cpu(j, &available_cpus) {
+			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1);
+			/*
+			 * Let's remember the last match to create "odd"
+			 * pairings on purpose in order for other code not
+			 * to assume any relation between physical and
+			 * logical CPU numbers.
+			 */
+			if (cluster != cluster_0)
+				match = j;
+		}
+		if (match != -1) {
+			bL_switcher_cpu_pairing[i] = match;
+			cpumask_clear_cpu(match, &available_cpus);
+			pr_info("CPU%d paired with CPU%d\n", i, match);
+		}
+	}
+
+	/*
+	 * Now we disable the unwanted CPUs i.e. everything that has no
+	 * pairing information (that includes the pairing counterparts).
+	 */
+	cpumask_clear(&bL_switcher_removed_logical_cpus);
+	for_each_online_cpu(i) {
+		cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+
+		/* Let's take note of the GIC ID for this CPU */
+		gic_id = gic_get_cpu_id(i);
+		if (gic_id < 0) {
+			pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
+			bL_switcher_restore_cpus();
+			return -EINVAL;
+		}
+		bL_gic_id[cpu][cluster] = gic_id;
+		pr_info("GIC ID for CPU %u cluster %u is %u\n",
+			cpu, cluster, gic_id);
+
+		if (bL_switcher_cpu_pairing[i] != -1) {
+			bL_switcher_cpu_original_cluster[i] = cluster;
+			continue;
+		}
+
+		ret = device_offline(get_cpu_device(i));
+		if (ret) {
+			bL_switcher_restore_cpus();
+			return ret;
+		}
+		cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus);
+	}
+
+	return 0;
+}
+
+/* Determine the logical CPU a given physical CPU is grouped on. */
+int bL_switcher_get_logical_index(u32 mpidr)
+{
+	int cpu;
+
+	if (!bL_switcher_active)
+		return -EUNATCH;
+
+	mpidr &= MPIDR_HWID_BITMASK;
+	for_each_online_cpu(cpu) {
+		int pairing = bL_switcher_cpu_pairing[cpu];
+		if (pairing == -1)
+			continue;
+		if ((mpidr == cpu_logical_map(cpu)) ||
+		    (mpidr == cpu_logical_map(pairing)))
+			return cpu;
+	}
+	return -EINVAL;
+}
+
+static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
+{
+	trace_cpu_migrate_current(get_ns(), read_mpidr());
+}
+
+int bL_switcher_trace_trigger(void)
+{
+	int ret;
+
+	preempt_disable();
+
+	bL_switcher_trace_trigger_cpu(NULL);
+	ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
+
+	preempt_enable();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger);
+
+static int bL_switcher_enable(void)
+{
+	int cpu, ret;
+
+	mutex_lock(&bL_switcher_activation_lock);
+	lock_device_hotplug();
+	if (bL_switcher_active) {
+		unlock_device_hotplug();
+		mutex_unlock(&bL_switcher_activation_lock);
+		return 0;
+	}
+
+	pr_info("big.LITTLE switcher initializing\n");
+
+	ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE);
+	if (ret)
+		goto error;
+
+	ret = bL_switcher_halve_cpus();
+	if (ret)
+		goto error;
+
+	bL_switcher_trace_trigger();
+
+	for_each_online_cpu(cpu) {
+		struct bL_thread *t = &bL_threads[cpu];
+		spin_lock_init(&t->lock);
+		init_waitqueue_head(&t->wq);
+		init_completion(&t->started);
+		t->wanted_cluster = -1;
+		t->task = bL_switcher_thread_create(cpu, t);
+	}
+
+	bL_switcher_active = 1;
+	bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+	pr_info("big.LITTLE switcher initialized\n");
+	goto out;
+
+error:
+	pr_warn("big.LITTLE switcher initialization failed\n");
+	bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+	unlock_device_hotplug();
+	mutex_unlock(&bL_switcher_activation_lock);
+	return ret;
+}
+
+#ifdef CONFIG_SYSFS
+
+static void bL_switcher_disable(void)
+{
+	unsigned int cpu, cluster;
+	struct bL_thread *t;
+	struct task_struct *task;
+
+	mutex_lock(&bL_switcher_activation_lock);
+	lock_device_hotplug();
+
+	if (!bL_switcher_active)
+		goto out;
+
+	if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) {
+		bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+		goto out;
+	}
+
+	bL_switcher_active = 0;
+
+	/*
+	 * To deactivate the switcher, we must shut down the switcher
+	 * threads to prevent any other requests from being accepted.
+	 * Then, if the final cluster for given logical CPU is not the
+	 * same as the original one, we'll recreate a switcher thread
+	 * just for the purpose of switching the CPU back without any
+	 * possibility for interference from external requests.
+	 */
+	for_each_online_cpu(cpu) {
+		t = &bL_threads[cpu];
+		task = t->task;
+		t->task = NULL;
+		if (!task || IS_ERR(task))
+			continue;
+		kthread_stop(task);
+		/* no more switch may happen on this CPU at this point */
+		cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+		if (cluster == bL_switcher_cpu_original_cluster[cpu])
+			continue;
+		init_completion(&t->started);
+		t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu];
+		task = bL_switcher_thread_create(cpu, t);
+		if (!IS_ERR(task)) {
+			wait_for_completion(&t->started);
+			kthread_stop(task);
+			cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+			if (cluster == bL_switcher_cpu_original_cluster[cpu])
+				continue;
+		}
+		/* If execution gets here, we're in trouble. */
+		pr_crit("%s: unable to restore original cluster for CPU %d\n",
+			__func__, cpu);
+		pr_crit("%s: CPU %d can't be restored\n",
+			__func__, bL_switcher_cpu_pairing[cpu]);
+		cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu],
+				  &bL_switcher_removed_logical_cpus);
+	}
+
+	bL_switcher_restore_cpus();
+	bL_switcher_trace_trigger();
+
+	bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+	unlock_device_hotplug();
+	mutex_unlock(&bL_switcher_activation_lock);
+}
+
+static ssize_t bL_switcher_active_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", bL_switcher_active);
+}
+
+static ssize_t bL_switcher_active_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+
+	switch (buf[0]) {
+	case '0':
+		bL_switcher_disable();
+		ret = 0;
+		break;
+	case '1':
+		ret = bL_switcher_enable();
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return (ret >= 0) ? count : ret;
+}
+
+static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int ret = bL_switcher_trace_trigger();
+
+	return ret ? ret : count;
+}
+
+static struct kobj_attribute bL_switcher_active_attr =
+	__ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store);
+
+static struct kobj_attribute bL_switcher_trace_trigger_attr =
+	__ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store);
+
+static struct attribute *bL_switcher_attrs[] = {
+	&bL_switcher_active_attr.attr,
+	&bL_switcher_trace_trigger_attr.attr,
+	NULL,
+};
+
+static struct attribute_group bL_switcher_attr_group = {
+	.attrs = bL_switcher_attrs,
+};
+
+static struct kobject *bL_switcher_kobj;
+
+static int __init bL_switcher_sysfs_init(void)
+{
+	int ret;
+
+	bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj);
+	if (!bL_switcher_kobj)
+		return -ENOMEM;
+	ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group);
+	if (ret)
+		kobject_put(bL_switcher_kobj);
+	return ret;
+}
+
+#endif  /* CONFIG_SYSFS */
+
+bool bL_switcher_get_enabled(void)
+{
+	mutex_lock(&bL_switcher_activation_lock);
+
+	return bL_switcher_active;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_get_enabled);
+
+void bL_switcher_put_enabled(void)
+{
+	mutex_unlock(&bL_switcher_activation_lock);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_put_enabled);
+
+/*
+ * Veto any CPU hotplug operation on those CPUs we've removed
+ * while the switcher is active.
+ * We're just not ready to deal with that given the trickery involved.
+ */
+static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
+					unsigned long action, void *hcpu)
+{
+	if (bL_switcher_active) {
+		int pairing = bL_switcher_cpu_pairing[(unsigned long)hcpu];
+		switch (action & 0xf) {
+		case CPU_UP_PREPARE:
+		case CPU_DOWN_PREPARE:
+			if (pairing == -1)
+				return NOTIFY_BAD;
+		}
+	}
+	return NOTIFY_DONE;
+}
+
+static bool no_bL_switcher;
+core_param(no_bL_switcher, no_bL_switcher, bool, 0644);
+
+static int __init bL_switcher_init(void)
+{
+	int ret;
+
+	if (!mcpm_is_available())
+		return -ENODEV;
+
+	cpu_notifier(bL_switcher_hotplug_callback, 0);
+
+	if (!no_bL_switcher) {
+		ret = bL_switcher_enable();
+		if (ret)
+			return ret;
+	}
+
+#ifdef CONFIG_SYSFS
+	ret = bL_switcher_sysfs_init();
+	if (ret)
+		pr_err("%s: unable to create sysfs entry\n", __func__);
+#endif
+
+	return 0;
+}
+
+late_initcall(bL_switcher_init);
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
new file mode 100644
index 00000000000..3f47f1203c6
--- /dev/null
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface
+ *
+ * Created by:	Nicolas Pitre, November 2012
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * Dummy interface to user space for debugging purpose only.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <asm/uaccess.h>
+#include <asm/bL_switcher.h>
+
+static ssize_t bL_switcher_write(struct file *file, const char __user *buf,
+			size_t len, loff_t *pos)
+{
+	unsigned char val[3];
+	unsigned int cpu, cluster;
+	int ret;
+
+	pr_debug("%s\n", __func__);
+
+	if (len < 3)
+		return -EINVAL;
+
+	if (copy_from_user(val, buf, 3))
+		return -EFAULT;
+
+	/* format: <cpu#>,<cluster#> */
+	if (val[0] < '0' || val[0] > '9' ||
+	    val[1] != ',' ||
+	    val[2] < '0' || val[2] > '1')
+		return -EINVAL;
+
+	cpu = val[0] - '0';
+	cluster = val[2] - '0';
+	ret = bL_switch_request(cpu, cluster);
+
+	return ret ? : len;
+}
+
+static const struct file_operations bL_switcher_fops = {
+	.write		= bL_switcher_write,
+	.owner	= THIS_MODULE,
+};
+
+static struct miscdevice bL_switcher_device = {
+	MISC_DYNAMIC_MINOR,
+	"b.L_switcher",
+	&bL_switcher_fops
+};
+
+static int __init bL_switcher_dummy_if_init(void)
+{
+	return misc_register(&bL_switcher_device);
+}
+
+static void __exit bL_switcher_dummy_if_exit(void)
+{
+	misc_deregister(&bL_switcher_device);
+}
+
+module_init(bL_switcher_dummy_if_init);
+module_exit(bL_switcher_dummy_if_exit);
diff --git a/arch/arm/common/clkdev.c b/arch/arm/common/clkdev.c
deleted file mode 100644
index aae5bc01acc..00000000000
--- a/arch/arm/common/clkdev.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- *  arch/arm/common/clkdev.c
- *
- *  Copyright (C) 2008 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Helper for the clk API to assist looking up a struct clk.
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/string.h>
-#include <linux/mutex.h>
-#include <linux/clk.h>
-
-#include <asm/clkdev.h>
-#include <mach/clkdev.h>
-
-static LIST_HEAD(clocks);
-static DEFINE_MUTEX(clocks_mutex);
-
-/*
- * Find the correct struct clk for the device and connection ID.
- * We do slightly fuzzy matching here:
- *  An entry with a NULL ID is assumed to be a wildcard.
- *  If an entry has a device ID, it must match
- *  If an entry has a connection ID, it must match
- * Then we take the most specific entry - with the following
- * order of precidence: dev+con > dev only > con only.
- */
-static struct clk *clk_find(const char *dev_id, const char *con_id)
-{
-	struct clk_lookup *p;
-	struct clk *clk = NULL;
-	int match, best = 0;
-
-	list_for_each_entry(p, &clocks, node) {
-		match = 0;
-		if (p->dev_id) {
-			if (!dev_id || strcmp(p->dev_id, dev_id))
-				continue;
-			match += 2;
-		}
-		if (p->con_id) {
-			if (!con_id || strcmp(p->con_id, con_id))
-				continue;
-			match += 1;
-		}
-		if (match == 0)
-			continue;
-
-		if (match > best) {
-			clk = p->clk;
-			best = match;
-		}
-	}
-	return clk;
-}
-
-struct clk *clk_get_sys(const char *dev_id, const char *con_id)
-{
-	struct clk *clk;
-
-	mutex_lock(&clocks_mutex);
-	clk = clk_find(dev_id, con_id);
-	if (clk && !__clk_get(clk))
-		clk = NULL;
-	mutex_unlock(&clocks_mutex);
-
-	return clk ? clk : ERR_PTR(-ENOENT);
-}
-EXPORT_SYMBOL(clk_get_sys);
-
-struct clk *clk_get(struct device *dev, const char *con_id)
-{
-	const char *dev_id = dev ? dev_name(dev) : NULL;
-
-	return clk_get_sys(dev_id, con_id);
-}
-EXPORT_SYMBOL(clk_get);
-
-void clk_put(struct clk *clk)
-{
-	__clk_put(clk);
-}
-EXPORT_SYMBOL(clk_put);
-
-void clkdev_add(struct clk_lookup *cl)
-{
-	mutex_lock(&clocks_mutex);
-	list_add_tail(&cl->node, &clocks);
-	mutex_unlock(&clocks_mutex);
-}
-EXPORT_SYMBOL(clkdev_add);
-
-#define MAX_DEV_ID	20
-#define MAX_CON_ID	16
-
-struct clk_lookup_alloc {
-	struct clk_lookup cl;
-	char	dev_id[MAX_DEV_ID];
-	char	con_id[MAX_CON_ID];
-};
-
-struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id,
-	const char *dev_fmt, ...)
-{
-	struct clk_lookup_alloc *cla;
-
-	cla = kzalloc(sizeof(*cla), GFP_KERNEL);
-	if (!cla)
-		return NULL;
-
-	cla->cl.clk = clk;
-	if (con_id) {
-		strlcpy(cla->con_id, con_id, sizeof(cla->con_id));
-		cla->cl.con_id = cla->con_id;
-	}
-
-	if (dev_fmt) {
-		va_list ap;
-
-		va_start(ap, dev_fmt);
-		vscnprintf(cla->dev_id, sizeof(cla->dev_id), dev_fmt, ap);
-		cla->cl.dev_id = cla->dev_id;
-		va_end(ap);
-	}
-
-	return &cla->cl;
-}
-EXPORT_SYMBOL(clkdev_alloc);
-
-int clk_add_alias(const char *alias, const char *alias_dev_name, char *id,
-	struct device *dev)
-{
-	struct clk *r = clk_get(dev, id);
-	struct clk_lookup *l;
-
-	if (IS_ERR(r))
-		return PTR_ERR(r);
-
-	l = clkdev_alloc(r, alias, alias_dev_name);
-	clk_put(r);
-	if (!l)
-		return -ENODEV;
-	clkdev_add(l);
-	return 0;
-}
-EXPORT_SYMBOL(clk_add_alias);
-
-/*
- * clkdev_drop - remove a clock dynamically allocated
- */
-void clkdev_drop(struct clk_lookup *cl)
-{
-	mutex_lock(&clocks_mutex);
-	list_del(&cl->node);
-	mutex_unlock(&clocks_mutex);
-	kfree(cl);
-}
-EXPORT_SYMBOL(clkdev_drop);
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index cc32c1e54a5..1143c4d5c56 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -79,6 +79,8 @@ struct dmabounce_device_info {
 	struct dmabounce_pool	large;
 
 	rwlock_t lock;
+
+	int (*needs_bounce)(struct device *, dma_addr_t, size_t);
 };
 
 #ifdef STATS
@@ -171,7 +173,8 @@ find_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t safe_dma_
 	read_lock_irqsave(&device_info->lock, flags);
 
 	list_for_each_entry(b, &device_info->safe_buffers, node)
-		if (b->safe_dma_addr == safe_dma_addr) {
+		if (b->safe_dma_addr <= safe_dma_addr &&
+		    b->safe_dma_addr + b->size > safe_dma_addr) {
 			rb = b;
 			break;
 		}
@@ -210,112 +213,91 @@ static struct safe_buffer *find_safe_buffer_dev(struct device *dev,
 	if (!dev || !dev->archdata.dmabounce)
 		return NULL;
 	if (dma_mapping_error(dev, dma_addr)) {
-		if (dev)
-			dev_err(dev, "Trying to %s invalid mapping\n", where);
-		else
-			pr_err("unknown device: Trying to %s invalid mapping\n", where);
+		dev_err(dev, "Trying to %s invalid mapping\n", where);
 		return NULL;
 	}
 	return find_safe_buffer(dev->archdata.dmabounce, dma_addr);
 }
 
-static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size,
-		enum dma_data_direction dir)
+static int needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
 {
-	struct dmabounce_device_info *device_info = dev->archdata.dmabounce;
-	dma_addr_t dma_addr;
-	int needs_bounce = 0;
-
-	if (device_info)
-		DO_STATS ( device_info->map_op_count++ );
-
-	dma_addr = virt_to_dma(dev, ptr);
+	if (!dev || !dev->archdata.dmabounce)
+		return 0;
 
 	if (dev->dma_mask) {
-		unsigned long mask = *dev->dma_mask;
-		unsigned long limit;
+		unsigned long limit, mask = *dev->dma_mask;
 
 		limit = (mask + 1) & ~mask;
 		if (limit && size > limit) {
 			dev_err(dev, "DMA mapping too big (requested %#x "
 				"mask %#Lx)\n", size, *dev->dma_mask);
-			return ~0;
+			return -E2BIG;
 		}
 
-		/*
-		 * Figure out if we need to bounce from the DMA mask.
-		 */
-		needs_bounce = (dma_addr | (dma_addr + size - 1)) & ~mask;
+		/* Figure out if we need to bounce from the DMA mask. */
+		if ((dma_addr | (dma_addr + size - 1)) & ~mask)
+			return 1;
 	}
 
-	if (device_info && (needs_bounce || dma_needs_bounce(dev, dma_addr, size))) {
-		struct safe_buffer *buf;
+	return !!dev->archdata.dmabounce->needs_bounce(dev, dma_addr, size);
+}
 
-		buf = alloc_safe_buffer(device_info, ptr, size, dir);
-		if (buf == 0) {
-			dev_err(dev, "%s: unable to map unsafe buffer %p!\n",
-			       __func__, ptr);
-			return 0;
-		}
+static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size,
+		enum dma_data_direction dir)
+{
+	struct dmabounce_device_info *device_info = dev->archdata.dmabounce;
+	struct safe_buffer *buf;
 
-		dev_dbg(dev,
-			"%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
-			__func__, buf->ptr, virt_to_dma(dev, buf->ptr),
-			buf->safe, buf->safe_dma_addr);
+	if (device_info)
+		DO_STATS ( device_info->map_op_count++ );
 
-		if ((dir == DMA_TO_DEVICE) ||
-		    (dir == DMA_BIDIRECTIONAL)) {
-			dev_dbg(dev, "%s: copy unsafe %p to safe %p, size %d\n",
-				__func__, ptr, buf->safe, size);
-			memcpy(buf->safe, ptr, size);
-		}
-		ptr = buf->safe;
+	buf = alloc_safe_buffer(device_info, ptr, size, dir);
+	if (buf == NULL) {
+		dev_err(dev, "%s: unable to map unsafe buffer %p!\n",
+		       __func__, ptr);
+		return DMA_ERROR_CODE;
+	}
 
-		dma_addr = buf->safe_dma_addr;
-	} else {
-		/*
-		 * We don't need to sync the DMA buffer since
-		 * it was allocated via the coherent allocators.
-		 */
-		dma_cache_maint(ptr, size, dir);
+	dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
+		__func__, buf->ptr, virt_to_dma(dev, buf->ptr),
+		buf->safe, buf->safe_dma_addr);
+
+	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) {
+		dev_dbg(dev, "%s: copy unsafe %p to safe %p, size %d\n",
+			__func__, ptr, buf->safe, size);
+		memcpy(buf->safe, ptr, size);
 	}
 
-	return dma_addr;
+	return buf->safe_dma_addr;
 }
 
-static inline void unmap_single(struct device *dev, dma_addr_t dma_addr,
+static inline void unmap_single(struct device *dev, struct safe_buffer *buf,
 		size_t size, enum dma_data_direction dir)
 {
-	struct safe_buffer *buf = find_safe_buffer_dev(dev, dma_addr, "unmap");
-
-	if (buf) {
-		BUG_ON(buf->size != size);
-		BUG_ON(buf->direction != dir);
+	BUG_ON(buf->size != size);
+	BUG_ON(buf->direction != dir);
 
-		dev_dbg(dev,
-			"%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
-			__func__, buf->ptr, virt_to_dma(dev, buf->ptr),
-			buf->safe, buf->safe_dma_addr);
+	dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
+		__func__, buf->ptr, virt_to_dma(dev, buf->ptr),
+		buf->safe, buf->safe_dma_addr);
 
-		DO_STATS(dev->archdata.dmabounce->bounce_count++);
+	DO_STATS(dev->archdata.dmabounce->bounce_count++);
 
-		if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) {
-			void *ptr = buf->ptr;
+	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) {
+		void *ptr = buf->ptr;
 
-			dev_dbg(dev,
-				"%s: copy back safe %p to unsafe %p size %d\n",
-				__func__, buf->safe, ptr, size);
-			memcpy(ptr, buf->safe, size);
+		dev_dbg(dev, "%s: copy back safe %p to unsafe %p size %d\n",
+			__func__, buf->safe, ptr, size);
+		memcpy(ptr, buf->safe, size);
 
-			/*
-			 * Since we may have written to a page cache page,
-			 * we need to ensure that the data will be coherent
-			 * with user mappings.
-			 */
-			__cpuc_flush_dcache_area(ptr, size);
-		}
-		free_safe_buffer(dev->archdata.dmabounce, buf);
+		/*
+		 * Since we may have written to a page cache page,
+		 * we need to ensure that the data will be coherent
+		 * with user mappings.
+		 */
+		__cpuc_flush_dcache_area(ptr, size);
 	}
+	free_safe_buffer(dev->archdata.dmabounce, buf);
 }
 
 /* ************************************************** */
@@ -326,51 +308,34 @@ static inline void unmap_single(struct device *dev, dma_addr_t dma_addr,
  * substitute the safe buffer for the unsafe one.
  * (basically move the buffer from an unsafe area to a safe one)
  */
-dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
-		enum dma_data_direction dir)
+static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size, enum dma_data_direction dir,
+		struct dma_attrs *attrs)
 {
-	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
-		__func__, ptr, size, dir);
-
-	BUG_ON(!valid_dma_direction(dir));
-
-	return map_single(dev, ptr, size, dir);
-}
-EXPORT_SYMBOL(dma_map_single);
-
-/*
- * see if a mapped address was really a "safe" buffer and if so, copy
- * the data from the safe buffer back to the unsafe buffer and free up
- * the safe buffer.  (basically return things back to the way they
- * should be)
- */
-void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-		enum dma_data_direction dir)
-{
-	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
-		__func__, (void *) dma_addr, size, dir);
-
-	unmap_single(dev, dma_addr, size, dir);
-}
-EXPORT_SYMBOL(dma_unmap_single);
+	dma_addr_t dma_addr;
+	int ret;
 
-dma_addr_t dma_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size, enum dma_data_direction dir)
-{
 	dev_dbg(dev, "%s(page=%p,off=%#lx,size=%zx,dir=%x)\n",
 		__func__, page, offset, size, dir);
 
-	BUG_ON(!valid_dma_direction(dir));
+	dma_addr = pfn_to_dma(dev, page_to_pfn(page)) + offset;
+
+	ret = needs_bounce(dev, dma_addr, size);
+	if (ret < 0)
+		return DMA_ERROR_CODE;
+
+	if (ret == 0) {
+		arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir);
+		return dma_addr;
+	}
 
 	if (PageHighMem(page)) {
-		dev_err(dev, "DMA buffer bouncing of HIGHMEM pages "
-			     "is not supported\n");
-		return ~0;
+		dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n");
+		return DMA_ERROR_CODE;
 	}
 
 	return map_single(dev, page_address(page) + offset, size, dir);
 }
-EXPORT_SYMBOL(dma_map_page);
 
 /*
  * see if a mapped address was really a "safe" buffer and if so, copy
@@ -378,32 +343,42 @@ EXPORT_SYMBOL(dma_map_page);
  * the safe buffer.  (basically return things back to the way they
  * should be)
  */
-void dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
-		enum dma_data_direction dir)
+static void dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
+		enum dma_data_direction dir, struct dma_attrs *attrs)
 {
-	dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
-		__func__, (void *) dma_addr, size, dir);
+	struct safe_buffer *buf;
+
+	dev_dbg(dev, "%s(dma=%#x,size=%d,dir=%x)\n",
+		__func__, dma_addr, size, dir);
+
+	buf = find_safe_buffer_dev(dev, dma_addr, __func__);
+	if (!buf) {
+		arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir);
+		return;
+	}
 
-	unmap_single(dev, dma_addr, size, dir);
+	unmap_single(dev, buf, size, dir);
 }
-EXPORT_SYMBOL(dma_unmap_page);
 
-int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr,
-		unsigned long off, size_t sz, enum dma_data_direction dir)
+static int __dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr,
+		size_t sz, enum dma_data_direction dir)
 {
 	struct safe_buffer *buf;
+	unsigned long off;
 
-	dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n",
-		__func__, addr, off, sz, dir);
+	dev_dbg(dev, "%s(dma=%#x,sz=%zx,dir=%x)\n",
+		__func__, addr, sz, dir);
 
 	buf = find_safe_buffer_dev(dev, addr, __func__);
 	if (!buf)
 		return 1;
 
+	off = addr - buf->safe_dma_addr;
+
 	BUG_ON(buf->direction != dir);
 
-	dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
-		__func__, buf->ptr, virt_to_dma(dev, buf->ptr),
+	dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x off=%#lx) mapped to %p (dma=%#x)\n",
+		__func__, buf->ptr, virt_to_dma(dev, buf->ptr), off,
 		buf->safe, buf->safe_dma_addr);
 
 	DO_STATS(dev->archdata.dmabounce->bounce_count++);
@@ -415,24 +390,35 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr,
 	}
 	return 0;
 }
-EXPORT_SYMBOL(dmabounce_sync_for_cpu);
 
-int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr,
-		unsigned long off, size_t sz, enum dma_data_direction dir)
+static void dmabounce_sync_for_cpu(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	if (!__dmabounce_sync_for_cpu(dev, handle, size, dir))
+		return;
+
+	arm_dma_ops.sync_single_for_cpu(dev, handle, size, dir);
+}
+
+static int __dmabounce_sync_for_device(struct device *dev, dma_addr_t addr,
+		size_t sz, enum dma_data_direction dir)
 {
 	struct safe_buffer *buf;
+	unsigned long off;
 
-	dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n",
-		__func__, addr, off, sz, dir);
+	dev_dbg(dev, "%s(dma=%#x,sz=%zx,dir=%x)\n",
+		__func__, addr, sz, dir);
 
 	buf = find_safe_buffer_dev(dev, addr, __func__);
 	if (!buf)
 		return 1;
 
+	off = addr - buf->safe_dma_addr;
+
 	BUG_ON(buf->direction != dir);
 
-	dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n",
-		__func__, buf->ptr, virt_to_dma(dev, buf->ptr),
+	dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x off=%#lx) mapped to %p (dma=%#x)\n",
+		__func__, buf->ptr, virt_to_dma(dev, buf->ptr), off,
 		buf->safe, buf->safe_dma_addr);
 
 	DO_STATS(dev->archdata.dmabounce->bounce_count++);
@@ -444,7 +430,39 @@ int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr,
 	}
 	return 0;
 }
-EXPORT_SYMBOL(dmabounce_sync_for_device);
+
+static void dmabounce_sync_for_device(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	if (!__dmabounce_sync_for_device(dev, handle, size, dir))
+		return;
+
+	arm_dma_ops.sync_single_for_device(dev, handle, size, dir);
+}
+
+static int dmabounce_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (dev->archdata.dmabounce)
+		return 0;
+
+	return arm_dma_ops.set_dma_mask(dev, dma_mask);
+}
+
+static struct dma_map_ops dmabounce_ops = {
+	.alloc			= arm_dma_alloc,
+	.free			= arm_dma_free,
+	.mmap			= arm_dma_mmap,
+	.get_sgtable		= arm_dma_get_sgtable,
+	.map_page		= dmabounce_map_page,
+	.unmap_page		= dmabounce_unmap_page,
+	.sync_single_for_cpu	= dmabounce_sync_for_cpu,
+	.sync_single_for_device	= dmabounce_sync_for_device,
+	.map_sg			= arm_dma_map_sg,
+	.unmap_sg		= arm_dma_unmap_sg,
+	.sync_sg_for_cpu	= arm_dma_sync_sg_for_cpu,
+	.sync_sg_for_device	= arm_dma_sync_sg_for_device,
+	.set_dma_mask		= dmabounce_set_mask,
+};
 
 static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev,
 		const char *name, unsigned long size)
@@ -459,7 +477,8 @@ static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev,
 }
 
 int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size,
-		unsigned long large_buffer_size)
+		unsigned long large_buffer_size,
+		int (*needs_bounce_fn)(struct device *, dma_addr_t, size_t))
 {
 	struct dmabounce_device_info *device_info;
 	int ret;
@@ -495,6 +514,7 @@ int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size,
 	device_info->dev = dev;
 	INIT_LIST_HEAD(&device_info->safe_buffers);
 	rwlock_init(&device_info->lock);
+	device_info->needs_bounce = needs_bounce_fn;
 
 #ifdef STATS
 	device_info->total_allocs = 0;
@@ -504,6 +524,7 @@ int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size,
 #endif
 
 	dev->archdata.dmabounce = device_info;
+	set_dma_ops(dev, &dmabounce_ops);
 
 	dev_info(dev, "dmabounce: registered device\n");
 
@@ -522,6 +543,7 @@ void dmabounce_unregister_dev(struct device *dev)
 	struct dmabounce_device_info *device_info = dev->archdata.dmabounce;
 
 	dev->archdata.dmabounce = NULL;
+	set_dma_ops(dev, NULL);
 
 	if (!device_info) {
 		dev_warn(dev,
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
new file mode 100644
index 00000000000..485be42519b
--- /dev/null
+++ b/arch/arm/common/edma.c
@@ -0,0 +1,1784 @@
+/*
+ * EDMA3 support for DaVinci
+ *
+ * Copyright (C) 2006-2009 Texas Instruments.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/edma.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/platform_data/edma.h>
+
+/* Offsets matching "struct edmacc_param" */
+#define PARM_OPT		0x00
+#define PARM_SRC		0x04
+#define PARM_A_B_CNT		0x08
+#define PARM_DST		0x0c
+#define PARM_SRC_DST_BIDX	0x10
+#define PARM_LINK_BCNTRLD	0x14
+#define PARM_SRC_DST_CIDX	0x18
+#define PARM_CCNT		0x1c
+
+#define PARM_SIZE		0x20
+
+/* Offsets for EDMA CC global channel registers and their shadows */
+#define SH_ER		0x00	/* 64 bits */
+#define SH_ECR		0x08	/* 64 bits */
+#define SH_ESR		0x10	/* 64 bits */
+#define SH_CER		0x18	/* 64 bits */
+#define SH_EER		0x20	/* 64 bits */
+#define SH_EECR		0x28	/* 64 bits */
+#define SH_EESR		0x30	/* 64 bits */
+#define SH_SER		0x38	/* 64 bits */
+#define SH_SECR		0x40	/* 64 bits */
+#define SH_IER		0x50	/* 64 bits */
+#define SH_IECR		0x58	/* 64 bits */
+#define SH_IESR		0x60	/* 64 bits */
+#define SH_IPR		0x68	/* 64 bits */
+#define SH_ICR		0x70	/* 64 bits */
+#define SH_IEVAL	0x78
+#define SH_QER		0x80
+#define SH_QEER		0x84
+#define SH_QEECR	0x88
+#define SH_QEESR	0x8c
+#define SH_QSER		0x90
+#define SH_QSECR	0x94
+#define SH_SIZE		0x200
+
+/* Offsets for EDMA CC global registers */
+#define EDMA_REV	0x0000
+#define EDMA_CCCFG	0x0004
+#define EDMA_QCHMAP	0x0200	/* 8 registers */
+#define EDMA_DMAQNUM	0x0240	/* 8 registers (4 on OMAP-L1xx) */
+#define EDMA_QDMAQNUM	0x0260
+#define EDMA_QUETCMAP	0x0280
+#define EDMA_QUEPRI	0x0284
+#define EDMA_EMR	0x0300	/* 64 bits */
+#define EDMA_EMCR	0x0308	/* 64 bits */
+#define EDMA_QEMR	0x0310
+#define EDMA_QEMCR	0x0314
+#define EDMA_CCERR	0x0318
+#define EDMA_CCERRCLR	0x031c
+#define EDMA_EEVAL	0x0320
+#define EDMA_DRAE	0x0340	/* 4 x 64 bits*/
+#define EDMA_QRAE	0x0380	/* 4 registers */
+#define EDMA_QUEEVTENTRY	0x0400	/* 2 x 16 registers */
+#define EDMA_QSTAT	0x0600	/* 2 registers */
+#define EDMA_QWMTHRA	0x0620
+#define EDMA_QWMTHRB	0x0624
+#define EDMA_CCSTAT	0x0640
+
+#define EDMA_M		0x1000	/* global channel registers */
+#define EDMA_ECR	0x1008
+#define EDMA_ECRH	0x100C
+#define EDMA_SHADOW0	0x2000	/* 4 regions shadowing global channels */
+#define EDMA_PARM	0x4000	/* 128 param entries */
+
+#define PARM_OFFSET(param_no)	(EDMA_PARM + ((param_no) << 5))
+
+#define EDMA_DCHMAP	0x0100  /* 64 registers */
+
+/* CCCFG register */
+#define GET_NUM_DMACH(x)	(x & 0x7) /* bits 0-2 */
+#define GET_NUM_PAENTRY(x)	((x & 0x7000) >> 12) /* bits 12-14 */
+#define GET_NUM_EVQUE(x)	((x & 0x70000) >> 16) /* bits 16-18 */
+#define GET_NUM_REGN(x)		((x & 0x300000) >> 20) /* bits 20-21 */
+#define CHMAP_EXIST		BIT(24)
+
+#define EDMA_MAX_DMACH           64
+#define EDMA_MAX_PARAMENTRY     512
+
+/*****************************************************************************/
+
+static void __iomem *edmacc_regs_base[EDMA_MAX_CC];
+
+static inline unsigned int edma_read(unsigned ctlr, int offset)
+{
+	return (unsigned int)__raw_readl(edmacc_regs_base[ctlr] + offset);
+}
+
+static inline void edma_write(unsigned ctlr, int offset, int val)
+{
+	__raw_writel(val, edmacc_regs_base[ctlr] + offset);
+}
+static inline void edma_modify(unsigned ctlr, int offset, unsigned and,
+		unsigned or)
+{
+	unsigned val = edma_read(ctlr, offset);
+	val &= and;
+	val |= or;
+	edma_write(ctlr, offset, val);
+}
+static inline void edma_and(unsigned ctlr, int offset, unsigned and)
+{
+	unsigned val = edma_read(ctlr, offset);
+	val &= and;
+	edma_write(ctlr, offset, val);
+}
+static inline void edma_or(unsigned ctlr, int offset, unsigned or)
+{
+	unsigned val = edma_read(ctlr, offset);
+	val |= or;
+	edma_write(ctlr, offset, val);
+}
+static inline unsigned int edma_read_array(unsigned ctlr, int offset, int i)
+{
+	return edma_read(ctlr, offset + (i << 2));
+}
+static inline void edma_write_array(unsigned ctlr, int offset, int i,
+		unsigned val)
+{
+	edma_write(ctlr, offset + (i << 2), val);
+}
+static inline void edma_modify_array(unsigned ctlr, int offset, int i,
+		unsigned and, unsigned or)
+{
+	edma_modify(ctlr, offset + (i << 2), and, or);
+}
+static inline void edma_or_array(unsigned ctlr, int offset, int i, unsigned or)
+{
+	edma_or(ctlr, offset + (i << 2), or);
+}
+static inline void edma_or_array2(unsigned ctlr, int offset, int i, int j,
+		unsigned or)
+{
+	edma_or(ctlr, offset + ((i*2 + j) << 2), or);
+}
+static inline void edma_write_array2(unsigned ctlr, int offset, int i, int j,
+		unsigned val)
+{
+	edma_write(ctlr, offset + ((i*2 + j) << 2), val);
+}
+static inline unsigned int edma_shadow0_read(unsigned ctlr, int offset)
+{
+	return edma_read(ctlr, EDMA_SHADOW0 + offset);
+}
+static inline unsigned int edma_shadow0_read_array(unsigned ctlr, int offset,
+		int i)
+{
+	return edma_read(ctlr, EDMA_SHADOW0 + offset + (i << 2));
+}
+static inline void edma_shadow0_write(unsigned ctlr, int offset, unsigned val)
+{
+	edma_write(ctlr, EDMA_SHADOW0 + offset, val);
+}
+static inline void edma_shadow0_write_array(unsigned ctlr, int offset, int i,
+		unsigned val)
+{
+	edma_write(ctlr, EDMA_SHADOW0 + offset + (i << 2), val);
+}
+static inline unsigned int edma_parm_read(unsigned ctlr, int offset,
+		int param_no)
+{
+	return edma_read(ctlr, EDMA_PARM + offset + (param_no << 5));
+}
+static inline void edma_parm_write(unsigned ctlr, int offset, int param_no,
+		unsigned val)
+{
+	edma_write(ctlr, EDMA_PARM + offset + (param_no << 5), val);
+}
+static inline void edma_parm_modify(unsigned ctlr, int offset, int param_no,
+		unsigned and, unsigned or)
+{
+	edma_modify(ctlr, EDMA_PARM + offset + (param_no << 5), and, or);
+}
+static inline void edma_parm_and(unsigned ctlr, int offset, int param_no,
+		unsigned and)
+{
+	edma_and(ctlr, EDMA_PARM + offset + (param_no << 5), and);
+}
+static inline void edma_parm_or(unsigned ctlr, int offset, int param_no,
+		unsigned or)
+{
+	edma_or(ctlr, EDMA_PARM + offset + (param_no << 5), or);
+}
+
+static inline void set_bits(int offset, int len, unsigned long *p)
+{
+	for (; len > 0; len--)
+		set_bit(offset + (len - 1), p);
+}
+
+static inline void clear_bits(int offset, int len, unsigned long *p)
+{
+	for (; len > 0; len--)
+		clear_bit(offset + (len - 1), p);
+}
+
+/*****************************************************************************/
+
+/* actual number of DMA channels and slots on this silicon */
+struct edma {
+	/* how many dma resources of each type */
+	unsigned	num_channels;
+	unsigned	num_region;
+	unsigned	num_slots;
+	unsigned	num_tc;
+	enum dma_event_q 	default_queue;
+
+	/* list of channels with no even trigger; terminated by "-1" */
+	const s8	*noevent;
+
+	/* The edma_inuse bit for each PaRAM slot is clear unless the
+	 * channel is in use ... by ARM or DSP, for QDMA, or whatever.
+	 */
+	DECLARE_BITMAP(edma_inuse, EDMA_MAX_PARAMENTRY);
+
+	/* The edma_unused bit for each channel is clear unless
+	 * it is not being used on this platform. It uses a bit
+	 * of SOC-specific initialization code.
+	 */
+	DECLARE_BITMAP(edma_unused, EDMA_MAX_DMACH);
+
+	unsigned	irq_res_start;
+	unsigned	irq_res_end;
+
+	struct dma_interrupt_data {
+		void (*callback)(unsigned channel, unsigned short ch_status,
+				void *data);
+		void *data;
+	} intr_data[EDMA_MAX_DMACH];
+};
+
+static struct edma *edma_cc[EDMA_MAX_CC];
+static int arch_num_cc;
+
+/* dummy param set used to (re)initialize parameter RAM slots */
+static const struct edmacc_param dummy_paramset = {
+	.link_bcntrld = 0xffff,
+	.ccnt = 1,
+};
+
+static const struct of_device_id edma_of_ids[] = {
+	{ .compatible = "ti,edma3", },
+	{}
+};
+
+/*****************************************************************************/
+
+static void map_dmach_queue(unsigned ctlr, unsigned ch_no,
+		enum dma_event_q queue_no)
+{
+	int bit = (ch_no & 0x7) * 4;
+
+	/* default to low priority queue */
+	if (queue_no == EVENTQ_DEFAULT)
+		queue_no = edma_cc[ctlr]->default_queue;
+
+	queue_no &= 7;
+	edma_modify_array(ctlr, EDMA_DMAQNUM, (ch_no >> 3),
+			~(0x7 << bit), queue_no << bit);
+}
+
+static void __init assign_priority_to_queue(unsigned ctlr, int queue_no,
+		int priority)
+{
+	int bit = queue_no * 4;
+	edma_modify(ctlr, EDMA_QUEPRI, ~(0x7 << bit),
+			((priority & 0x7) << bit));
+}
+
+/**
+ * map_dmach_param - Maps channel number to param entry number
+ *
+ * This maps the dma channel number to param entry numberter. In
+ * other words using the DMA channel mapping registers a param entry
+ * can be mapped to any channel
+ *
+ * Callers are responsible for ensuring the channel mapping logic is
+ * included in that particular EDMA variant (Eg : dm646x)
+ *
+ */
+static void __init map_dmach_param(unsigned ctlr)
+{
+	int i;
+	for (i = 0; i < EDMA_MAX_DMACH; i++)
+		edma_write_array(ctlr, EDMA_DCHMAP , i , (i << 5));
+}
+
+static inline void
+setup_dma_interrupt(unsigned lch,
+	void (*callback)(unsigned channel, u16 ch_status, void *data),
+	void *data)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(lch);
+	lch = EDMA_CHAN_SLOT(lch);
+
+	if (!callback)
+		edma_shadow0_write_array(ctlr, SH_IECR, lch >> 5,
+				BIT(lch & 0x1f));
+
+	edma_cc[ctlr]->intr_data[lch].callback = callback;
+	edma_cc[ctlr]->intr_data[lch].data = data;
+
+	if (callback) {
+		edma_shadow0_write_array(ctlr, SH_ICR, lch >> 5,
+				BIT(lch & 0x1f));
+		edma_shadow0_write_array(ctlr, SH_IESR, lch >> 5,
+				BIT(lch & 0x1f));
+	}
+}
+
+static int irq2ctlr(int irq)
+{
+	if (irq >= edma_cc[0]->irq_res_start && irq <= edma_cc[0]->irq_res_end)
+		return 0;
+	else if (irq >= edma_cc[1]->irq_res_start &&
+		irq <= edma_cc[1]->irq_res_end)
+		return 1;
+
+	return -1;
+}
+
+/******************************************************************************
+ *
+ * DMA interrupt handler
+ *
+ *****************************************************************************/
+static irqreturn_t dma_irq_handler(int irq, void *data)
+{
+	int ctlr;
+	u32 sh_ier;
+	u32 sh_ipr;
+	u32 bank;
+
+	ctlr = irq2ctlr(irq);
+	if (ctlr < 0)
+		return IRQ_NONE;
+
+	dev_dbg(data, "dma_irq_handler\n");
+
+	sh_ipr = edma_shadow0_read_array(ctlr, SH_IPR, 0);
+	if (!sh_ipr) {
+		sh_ipr = edma_shadow0_read_array(ctlr, SH_IPR, 1);
+		if (!sh_ipr)
+			return IRQ_NONE;
+		sh_ier = edma_shadow0_read_array(ctlr, SH_IER, 1);
+		bank = 1;
+	} else {
+		sh_ier = edma_shadow0_read_array(ctlr, SH_IER, 0);
+		bank = 0;
+	}
+
+	do {
+		u32 slot;
+		u32 channel;
+
+		dev_dbg(data, "IPR%d %08x\n", bank, sh_ipr);
+
+		slot = __ffs(sh_ipr);
+		sh_ipr &= ~(BIT(slot));
+
+		if (sh_ier & BIT(slot)) {
+			channel = (bank << 5) | slot;
+			/* Clear the corresponding IPR bits */
+			edma_shadow0_write_array(ctlr, SH_ICR, bank,
+					BIT(slot));
+			if (edma_cc[ctlr]->intr_data[channel].callback)
+				edma_cc[ctlr]->intr_data[channel].callback(
+					channel, EDMA_DMA_COMPLETE,
+					edma_cc[ctlr]->intr_data[channel].data);
+		}
+	} while (sh_ipr);
+
+	edma_shadow0_write(ctlr, SH_IEVAL, 1);
+	return IRQ_HANDLED;
+}
+
+/******************************************************************************
+ *
+ * DMA error interrupt handler
+ *
+ *****************************************************************************/
+static irqreturn_t dma_ccerr_handler(int irq, void *data)
+{
+	int i;
+	int ctlr;
+	unsigned int cnt = 0;
+
+	ctlr = irq2ctlr(irq);
+	if (ctlr < 0)
+		return IRQ_NONE;
+
+	dev_dbg(data, "dma_ccerr_handler\n");
+
+	if ((edma_read_array(ctlr, EDMA_EMR, 0) == 0) &&
+	    (edma_read_array(ctlr, EDMA_EMR, 1) == 0) &&
+	    (edma_read(ctlr, EDMA_QEMR) == 0) &&
+	    (edma_read(ctlr, EDMA_CCERR) == 0))
+		return IRQ_NONE;
+
+	while (1) {
+		int j = -1;
+		if (edma_read_array(ctlr, EDMA_EMR, 0))
+			j = 0;
+		else if (edma_read_array(ctlr, EDMA_EMR, 1))
+			j = 1;
+		if (j >= 0) {
+			dev_dbg(data, "EMR%d %08x\n", j,
+					edma_read_array(ctlr, EDMA_EMR, j));
+			for (i = 0; i < 32; i++) {
+				int k = (j << 5) + i;
+				if (edma_read_array(ctlr, EDMA_EMR, j) &
+							BIT(i)) {
+					/* Clear the corresponding EMR bits */
+					edma_write_array(ctlr, EDMA_EMCR, j,
+							BIT(i));
+					/* Clear any SER */
+					edma_shadow0_write_array(ctlr, SH_SECR,
+								j, BIT(i));
+					if (edma_cc[ctlr]->intr_data[k].
+								callback) {
+						edma_cc[ctlr]->intr_data[k].
+						callback(k,
+						EDMA_DMA_CC_ERROR,
+						edma_cc[ctlr]->intr_data
+						[k].data);
+					}
+				}
+			}
+		} else if (edma_read(ctlr, EDMA_QEMR)) {
+			dev_dbg(data, "QEMR %02x\n",
+				edma_read(ctlr, EDMA_QEMR));
+			for (i = 0; i < 8; i++) {
+				if (edma_read(ctlr, EDMA_QEMR) & BIT(i)) {
+					/* Clear the corresponding IPR bits */
+					edma_write(ctlr, EDMA_QEMCR, BIT(i));
+					edma_shadow0_write(ctlr, SH_QSECR,
+								BIT(i));
+
+					/* NOTE:  not reported!! */
+				}
+			}
+		} else if (edma_read(ctlr, EDMA_CCERR)) {
+			dev_dbg(data, "CCERR %08x\n",
+				edma_read(ctlr, EDMA_CCERR));
+			/* FIXME:  CCERR.BIT(16) ignored!  much better
+			 * to just write CCERRCLR with CCERR value...
+			 */
+			for (i = 0; i < 8; i++) {
+				if (edma_read(ctlr, EDMA_CCERR) & BIT(i)) {
+					/* Clear the corresponding IPR bits */
+					edma_write(ctlr, EDMA_CCERRCLR, BIT(i));
+
+					/* NOTE:  not reported!! */
+				}
+			}
+		}
+		if ((edma_read_array(ctlr, EDMA_EMR, 0) == 0) &&
+		    (edma_read_array(ctlr, EDMA_EMR, 1) == 0) &&
+		    (edma_read(ctlr, EDMA_QEMR) == 0) &&
+		    (edma_read(ctlr, EDMA_CCERR) == 0))
+			break;
+		cnt++;
+		if (cnt > 10)
+			break;
+	}
+	edma_write(ctlr, EDMA_EEVAL, 1);
+	return IRQ_HANDLED;
+}
+
+static int reserve_contiguous_slots(int ctlr, unsigned int id,
+				     unsigned int num_slots,
+				     unsigned int start_slot)
+{
+	int i, j;
+	unsigned int count = num_slots;
+	int stop_slot = start_slot;
+	DECLARE_BITMAP(tmp_inuse, EDMA_MAX_PARAMENTRY);
+
+	for (i = start_slot; i < edma_cc[ctlr]->num_slots; ++i) {
+		j = EDMA_CHAN_SLOT(i);
+		if (!test_and_set_bit(j, edma_cc[ctlr]->edma_inuse)) {
+			/* Record our current beginning slot */
+			if (count == num_slots)
+				stop_slot = i;
+
+			count--;
+			set_bit(j, tmp_inuse);
+
+			if (count == 0)
+				break;
+		} else {
+			clear_bit(j, tmp_inuse);
+
+			if (id == EDMA_CONT_PARAMS_FIXED_EXACT) {
+				stop_slot = i;
+				break;
+			} else {
+				count = num_slots;
+			}
+		}
+	}
+
+	/*
+	 * We have to clear any bits that we set
+	 * if we run out parameter RAM slots, i.e we do find a set
+	 * of contiguous parameter RAM slots but do not find the exact number
+	 * requested as we may reach the total number of parameter RAM slots
+	 */
+	if (i == edma_cc[ctlr]->num_slots)
+		stop_slot = i;
+
+	j = start_slot;
+	for_each_set_bit_from(j, tmp_inuse, stop_slot)
+		clear_bit(j, edma_cc[ctlr]->edma_inuse);
+
+	if (count)
+		return -EBUSY;
+
+	for (j = i - num_slots + 1; j <= i; ++j)
+		memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(j),
+			&dummy_paramset, PARM_SIZE);
+
+	return EDMA_CTLR_CHAN(ctlr, i - num_slots + 1);
+}
+
+static int prepare_unused_channel_list(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	int i, count, ctlr;
+	struct of_phandle_args  dma_spec;
+
+	if (dev->of_node) {
+		count = of_property_count_strings(dev->of_node, "dma-names");
+		if (count < 0)
+			return 0;
+		for (i = 0; i < count; i++) {
+			if (of_parse_phandle_with_args(dev->of_node, "dmas",
+						       "#dma-cells", i,
+						       &dma_spec))
+				continue;
+
+			if (!of_match_node(edma_of_ids, dma_spec.np)) {
+				of_node_put(dma_spec.np);
+				continue;
+			}
+
+			clear_bit(EDMA_CHAN_SLOT(dma_spec.args[0]),
+				  edma_cc[0]->edma_unused);
+			of_node_put(dma_spec.np);
+		}
+		return 0;
+	}
+
+	/* For non-OF case */
+	for (i = 0; i < pdev->num_resources; i++) {
+		if ((pdev->resource[i].flags & IORESOURCE_DMA) &&
+				(int)pdev->resource[i].start >= 0) {
+			ctlr = EDMA_CTLR(pdev->resource[i].start);
+			clear_bit(EDMA_CHAN_SLOT(pdev->resource[i].start),
+				  edma_cc[ctlr]->edma_unused);
+		}
+	}
+
+	return 0;
+}
+
+/*-----------------------------------------------------------------------*/
+
+static bool unused_chan_list_done;
+
+/* Resource alloc/free:  dma channels, parameter RAM slots */
+
+/**
+ * edma_alloc_channel - allocate DMA channel and paired parameter RAM
+ * @channel: specific channel to allocate; negative for "any unmapped channel"
+ * @callback: optional; to be issued on DMA completion or errors
+ * @data: passed to callback
+ * @eventq_no: an EVENTQ_* constant, used to choose which Transfer
+ *	Controller (TC) executes requests using this channel.  Use
+ *	EVENTQ_DEFAULT unless you really need a high priority queue.
+ *
+ * This allocates a DMA channel and its associated parameter RAM slot.
+ * The parameter RAM is initialized to hold a dummy transfer.
+ *
+ * Normal use is to pass a specific channel number as @channel, to make
+ * use of hardware events mapped to that channel.  When the channel will
+ * be used only for software triggering or event chaining, channels not
+ * mapped to hardware events (or mapped to unused events) are preferable.
+ *
+ * DMA transfers start from a channel using edma_start(), or by
+ * chaining.  When the transfer described in that channel's parameter RAM
+ * slot completes, that slot's data may be reloaded through a link.
+ *
+ * DMA errors are only reported to the @callback associated with the
+ * channel driving that transfer, but transfer completion callbacks can
+ * be sent to another channel under control of the TCC field in
+ * the option word of the transfer's parameter RAM set.  Drivers must not
+ * use DMA transfer completion callbacks for channels they did not allocate.
+ * (The same applies to TCC codes used in transfer chaining.)
+ *
+ * Returns the number of the channel, else negative errno.
+ */
+int edma_alloc_channel(int channel,
+		void (*callback)(unsigned channel, u16 ch_status, void *data),
+		void *data,
+		enum dma_event_q eventq_no)
+{
+	unsigned i, done = 0, ctlr = 0;
+	int ret = 0;
+
+	if (!unused_chan_list_done) {
+		/*
+		 * Scan all the platform devices to find out the EDMA channels
+		 * used and clear them in the unused list, making the rest
+		 * available for ARM usage.
+		 */
+		ret = bus_for_each_dev(&platform_bus_type, NULL, NULL,
+				prepare_unused_channel_list);
+		if (ret < 0)
+			return ret;
+
+		unused_chan_list_done = true;
+	}
+
+	if (channel >= 0) {
+		ctlr = EDMA_CTLR(channel);
+		channel = EDMA_CHAN_SLOT(channel);
+	}
+
+	if (channel < 0) {
+		for (i = 0; i < arch_num_cc; i++) {
+			channel = 0;
+			for (;;) {
+				channel = find_next_bit(edma_cc[i]->edma_unused,
+						edma_cc[i]->num_channels,
+						channel);
+				if (channel == edma_cc[i]->num_channels)
+					break;
+				if (!test_and_set_bit(channel,
+						edma_cc[i]->edma_inuse)) {
+					done = 1;
+					ctlr = i;
+					break;
+				}
+				channel++;
+			}
+			if (done)
+				break;
+		}
+		if (!done)
+			return -ENOMEM;
+	} else if (channel >= edma_cc[ctlr]->num_channels) {
+		return -EINVAL;
+	} else if (test_and_set_bit(channel, edma_cc[ctlr]->edma_inuse)) {
+		return -EBUSY;
+	}
+
+	/* ensure access through shadow region 0 */
+	edma_or_array2(ctlr, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
+
+	/* ensure no events are pending */
+	edma_stop(EDMA_CTLR_CHAN(ctlr, channel));
+	memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(channel),
+			&dummy_paramset, PARM_SIZE);
+
+	if (callback)
+		setup_dma_interrupt(EDMA_CTLR_CHAN(ctlr, channel),
+					callback, data);
+
+	map_dmach_queue(ctlr, channel, eventq_no);
+
+	return EDMA_CTLR_CHAN(ctlr, channel);
+}
+EXPORT_SYMBOL(edma_alloc_channel);
+
+
+/**
+ * edma_free_channel - deallocate DMA channel
+ * @channel: dma channel returned from edma_alloc_channel()
+ *
+ * This deallocates the DMA channel and associated parameter RAM slot
+ * allocated by edma_alloc_channel().
+ *
+ * Callers are responsible for ensuring the channel is inactive, and
+ * will not be reactivated by linking, chaining, or software calls to
+ * edma_start().
+ */
+void edma_free_channel(unsigned channel)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(channel);
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel >= edma_cc[ctlr]->num_channels)
+		return;
+
+	setup_dma_interrupt(channel, NULL, NULL);
+	/* REVISIT should probably take out of shadow region 0 */
+
+	memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(channel),
+			&dummy_paramset, PARM_SIZE);
+	clear_bit(channel, edma_cc[ctlr]->edma_inuse);
+}
+EXPORT_SYMBOL(edma_free_channel);
+
+/**
+ * edma_alloc_slot - allocate DMA parameter RAM
+ * @slot: specific slot to allocate; negative for "any unused slot"
+ *
+ * This allocates a parameter RAM slot, initializing it to hold a
+ * dummy transfer.  Slots allocated using this routine have not been
+ * mapped to a hardware DMA channel, and will normally be used by
+ * linking to them from a slot associated with a DMA channel.
+ *
+ * Normal use is to pass EDMA_SLOT_ANY as the @slot, but specific
+ * slots may be allocated on behalf of DSP firmware.
+ *
+ * Returns the number of the slot, else negative errno.
+ */
+int edma_alloc_slot(unsigned ctlr, int slot)
+{
+	if (!edma_cc[ctlr])
+		return -EINVAL;
+
+	if (slot >= 0)
+		slot = EDMA_CHAN_SLOT(slot);
+
+	if (slot < 0) {
+		slot = edma_cc[ctlr]->num_channels;
+		for (;;) {
+			slot = find_next_zero_bit(edma_cc[ctlr]->edma_inuse,
+					edma_cc[ctlr]->num_slots, slot);
+			if (slot == edma_cc[ctlr]->num_slots)
+				return -ENOMEM;
+			if (!test_and_set_bit(slot, edma_cc[ctlr]->edma_inuse))
+				break;
+		}
+	} else if (slot < edma_cc[ctlr]->num_channels ||
+			slot >= edma_cc[ctlr]->num_slots) {
+		return -EINVAL;
+	} else if (test_and_set_bit(slot, edma_cc[ctlr]->edma_inuse)) {
+		return -EBUSY;
+	}
+
+	memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(slot),
+			&dummy_paramset, PARM_SIZE);
+
+	return EDMA_CTLR_CHAN(ctlr, slot);
+}
+EXPORT_SYMBOL(edma_alloc_slot);
+
+/**
+ * edma_free_slot - deallocate DMA parameter RAM
+ * @slot: parameter RAM slot returned from edma_alloc_slot()
+ *
+ * This deallocates the parameter RAM slot allocated by edma_alloc_slot().
+ * Callers are responsible for ensuring the slot is inactive, and will
+ * not be activated.
+ */
+void edma_free_slot(unsigned slot)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(slot);
+	slot = EDMA_CHAN_SLOT(slot);
+
+	if (slot < edma_cc[ctlr]->num_channels ||
+		slot >= edma_cc[ctlr]->num_slots)
+		return;
+
+	memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(slot),
+			&dummy_paramset, PARM_SIZE);
+	clear_bit(slot, edma_cc[ctlr]->edma_inuse);
+}
+EXPORT_SYMBOL(edma_free_slot);
+
+
+/**
+ * edma_alloc_cont_slots- alloc contiguous parameter RAM slots
+ * The API will return the starting point of a set of
+ * contiguous parameter RAM slots that have been requested
+ *
+ * @id: can only be EDMA_CONT_PARAMS_ANY or EDMA_CONT_PARAMS_FIXED_EXACT
+ * or EDMA_CONT_PARAMS_FIXED_NOT_EXACT
+ * @count: number of contiguous Paramter RAM slots
+ * @slot  - the start value of Parameter RAM slot that should be passed if id
+ * is EDMA_CONT_PARAMS_FIXED_EXACT or EDMA_CONT_PARAMS_FIXED_NOT_EXACT
+ *
+ * If id is EDMA_CONT_PARAMS_ANY then the API starts looking for a set of
+ * contiguous Parameter RAM slots from parameter RAM 64 in the case of
+ * DaVinci SOCs and 32 in the case of DA8xx SOCs.
+ *
+ * If id is EDMA_CONT_PARAMS_FIXED_EXACT then the API starts looking for a
+ * set of contiguous parameter RAM slots from the "slot" that is passed as an
+ * argument to the API.
+ *
+ * If id is EDMA_CONT_PARAMS_FIXED_NOT_EXACT then the API initially tries
+ * starts looking for a set of contiguous parameter RAMs from the "slot"
+ * that is passed as an argument to the API. On failure the API will try to
+ * find a set of contiguous Parameter RAM slots from the remaining Parameter
+ * RAM slots
+ */
+int edma_alloc_cont_slots(unsigned ctlr, unsigned int id, int slot, int count)
+{
+	/*
+	 * The start slot requested should be greater than
+	 * the number of channels and lesser than the total number
+	 * of slots
+	 */
+	if ((id != EDMA_CONT_PARAMS_ANY) &&
+		(slot < edma_cc[ctlr]->num_channels ||
+		slot >= edma_cc[ctlr]->num_slots))
+		return -EINVAL;
+
+	/*
+	 * The number of parameter RAM slots requested cannot be less than 1
+	 * and cannot be more than the number of slots minus the number of
+	 * channels
+	 */
+	if (count < 1 || count >
+		(edma_cc[ctlr]->num_slots - edma_cc[ctlr]->num_channels))
+		return -EINVAL;
+
+	switch (id) {
+	case EDMA_CONT_PARAMS_ANY:
+		return reserve_contiguous_slots(ctlr, id, count,
+						 edma_cc[ctlr]->num_channels);
+	case EDMA_CONT_PARAMS_FIXED_EXACT:
+	case EDMA_CONT_PARAMS_FIXED_NOT_EXACT:
+		return reserve_contiguous_slots(ctlr, id, count, slot);
+	default:
+		return -EINVAL;
+	}
+
+}
+EXPORT_SYMBOL(edma_alloc_cont_slots);
+
+/**
+ * edma_free_cont_slots - deallocate DMA parameter RAM slots
+ * @slot: first parameter RAM of a set of parameter RAM slots to be freed
+ * @count: the number of contiguous parameter RAM slots to be freed
+ *
+ * This deallocates the parameter RAM slots allocated by
+ * edma_alloc_cont_slots.
+ * Callers/applications need to keep track of sets of contiguous
+ * parameter RAM slots that have been allocated using the edma_alloc_cont_slots
+ * API.
+ * Callers are responsible for ensuring the slots are inactive, and will
+ * not be activated.
+ */
+int edma_free_cont_slots(unsigned slot, int count)
+{
+	unsigned ctlr, slot_to_free;
+	int i;
+
+	ctlr = EDMA_CTLR(slot);
+	slot = EDMA_CHAN_SLOT(slot);
+
+	if (slot < edma_cc[ctlr]->num_channels ||
+		slot >= edma_cc[ctlr]->num_slots ||
+		count < 1)
+		return -EINVAL;
+
+	for (i = slot; i < slot + count; ++i) {
+		ctlr = EDMA_CTLR(i);
+		slot_to_free = EDMA_CHAN_SLOT(i);
+
+		memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(slot_to_free),
+			&dummy_paramset, PARM_SIZE);
+		clear_bit(slot_to_free, edma_cc[ctlr]->edma_inuse);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(edma_free_cont_slots);
+
+/*-----------------------------------------------------------------------*/
+
+/* Parameter RAM operations (i) -- read/write partial slots */
+
+/**
+ * edma_set_src - set initial DMA source address in parameter RAM slot
+ * @slot: parameter RAM slot being configured
+ * @src_port: physical address of source (memory, controller FIFO, etc)
+ * @addressMode: INCR, except in very rare cases
+ * @fifoWidth: ignored unless @addressMode is FIFO, else specifies the
+ *	width to use when addressing the fifo (e.g. W8BIT, W32BIT)
+ *
+ * Note that the source address is modified during the DMA transfer
+ * according to edma_set_src_index().
+ */
+void edma_set_src(unsigned slot, dma_addr_t src_port,
+				enum address_mode mode, enum fifo_width width)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(slot);
+	slot = EDMA_CHAN_SLOT(slot);
+
+	if (slot < edma_cc[ctlr]->num_slots) {
+		unsigned int i = edma_parm_read(ctlr, PARM_OPT, slot);
+
+		if (mode) {
+			/* set SAM and program FWID */
+			i = (i & ~(EDMA_FWID)) | (SAM | ((width & 0x7) << 8));
+		} else {
+			/* clear SAM */
+			i &= ~SAM;
+		}
+		edma_parm_write(ctlr, PARM_OPT, slot, i);
+
+		/* set the source port address
+		   in source register of param structure */
+		edma_parm_write(ctlr, PARM_SRC, slot, src_port);
+	}
+}
+EXPORT_SYMBOL(edma_set_src);
+
+/**
+ * edma_set_dest - set initial DMA destination address in parameter RAM slot
+ * @slot: parameter RAM slot being configured
+ * @dest_port: physical address of destination (memory, controller FIFO, etc)
+ * @addressMode: INCR, except in very rare cases
+ * @fifoWidth: ignored unless @addressMode is FIFO, else specifies the
+ *	width to use when addressing the fifo (e.g. W8BIT, W32BIT)
+ *
+ * Note that the destination address is modified during the DMA transfer
+ * according to edma_set_dest_index().
+ */
+void edma_set_dest(unsigned slot, dma_addr_t dest_port,
+				 enum address_mode mode, enum fifo_width width)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(slot);
+	slot = EDMA_CHAN_SLOT(slot);
+
+	if (slot < edma_cc[ctlr]->num_slots) {
+		unsigned int i = edma_parm_read(ctlr, PARM_OPT, slot);
+
+		if (mode) {
+			/* set DAM and program FWID */
+			i = (i & ~(EDMA_FWID)) | (DAM | ((width & 0x7) << 8));
+		} else {
+			/* clear DAM */
+			i &= ~DAM;
+		}
+		edma_parm_write(ctlr, PARM_OPT, slot, i);
+		/* set the destination port address
+		   in dest register of param structure */
+		edma_parm_write(ctlr, PARM_DST, slot, dest_port);
+	}
+}
+EXPORT_SYMBOL(edma_set_dest);
+
+/**
+ * edma_get_position - returns the current transfer point
+ * @slot: parameter RAM slot being examined
+ * @dst:  true selects the dest position, false the source
+ *
+ * Returns the position of the current active slot
+ */
+dma_addr_t edma_get_position(unsigned slot, bool dst)
+{
+	u32 offs, ctlr = EDMA_CTLR(slot);
+
+	slot = EDMA_CHAN_SLOT(slot);
+
+	offs = PARM_OFFSET(slot);
+	offs += dst ? PARM_DST : PARM_SRC;
+
+	return edma_read(ctlr, offs);
+}
+
+/**
+ * edma_set_src_index - configure DMA source address indexing
+ * @slot: parameter RAM slot being configured
+ * @src_bidx: byte offset between source arrays in a frame
+ * @src_cidx: byte offset between source frames in a block
+ *
+ * Offsets are specified to support either contiguous or discontiguous
+ * memory transfers, or repeated access to a hardware register, as needed.
+ * When accessing hardware registers, both offsets are normally zero.
+ */
+void edma_set_src_index(unsigned slot, s16 src_bidx, s16 src_cidx)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(slot);
+	slot = EDMA_CHAN_SLOT(slot);
+
+	if (slot < edma_cc[ctlr]->num_slots) {
+		edma_parm_modify(ctlr, PARM_SRC_DST_BIDX, slot,
+				0xffff0000, src_bidx);
+		edma_parm_modify(ctlr, PARM_SRC_DST_CIDX, slot,
+				0xffff0000, src_cidx);
+	}
+}
+EXPORT_SYMBOL(edma_set_src_index);
+
+/**
+ * edma_set_dest_index - configure DMA destination address indexing
+ * @slot: parameter RAM slot being configured
+ * @dest_bidx: byte offset between destination arrays in a frame
+ * @dest_cidx: byte offset between destination frames in a block
+ *
+ * Offsets are specified to support either contiguous or discontiguous
+ * memory transfers, or repeated access to a hardware register, as needed.
+ * When accessing hardware registers, both offsets are normally zero.
+ */
+void edma_set_dest_index(unsigned slot, s16 dest_bidx, s16 dest_cidx)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(slot);
+	slot = EDMA_CHAN_SLOT(slot);
+
+	if (slot < edma_cc[ctlr]->num_slots) {
+		edma_parm_modify(ctlr, PARM_SRC_DST_BIDX, slot,
+				0x0000ffff, dest_bidx << 16);
+		edma_parm_modify(ctlr, PARM_SRC_DST_CIDX, slot,
+				0x0000ffff, dest_cidx << 16);
+	}
+}
+EXPORT_SYMBOL(edma_set_dest_index);
+
+/**
+ * edma_set_transfer_params - configure DMA transfer parameters
+ * @slot: parameter RAM slot being configured
+ * @acnt: how many bytes per array (at least one)
+ * @bcnt: how many arrays per frame (at least one)
+ * @ccnt: how many frames per block (at least one)
+ * @bcnt_rld: used only for A-Synchronized transfers; this specifies
+ *	the value to reload into bcnt when it decrements to zero
+ * @sync_mode: ASYNC or ABSYNC
+ *
+ * See the EDMA3 documentation to understand how to configure and link
+ * transfers using the fields in PaRAM slots.  If you are not doing it
+ * all at once with edma_write_slot(), you will use this routine
+ * plus two calls each for source and destination, setting the initial
+ * address and saying how to index that address.
+ *
+ * An example of an A-Synchronized transfer is a serial link using a
+ * single word shift register.  In that case, @acnt would be equal to
+ * that word size; the serial controller issues a DMA synchronization
+ * event to transfer each word, and memory access by the DMA transfer
+ * controller will be word-at-a-time.
+ *
+ * An example of an AB-Synchronized transfer is a device using a FIFO.
+ * In that case, @acnt equals the FIFO width and @bcnt equals its depth.
+ * The controller with the FIFO issues DMA synchronization events when
+ * the FIFO threshold is reached, and the DMA transfer controller will
+ * transfer one frame to (or from) the FIFO.  It will probably use
+ * efficient burst modes to access memory.
+ */
+void edma_set_transfer_params(unsigned slot,
+		u16 acnt, u16 bcnt, u16 ccnt,
+		u16 bcnt_rld, enum sync_dimension sync_mode)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(slot);
+	slot = EDMA_CHAN_SLOT(slot);
+
+	if (slot < edma_cc[ctlr]->num_slots) {
+		edma_parm_modify(ctlr, PARM_LINK_BCNTRLD, slot,
+				0x0000ffff, bcnt_rld << 16);
+		if (sync_mode == ASYNC)
+			edma_parm_and(ctlr, PARM_OPT, slot, ~SYNCDIM);
+		else
+			edma_parm_or(ctlr, PARM_OPT, slot, SYNCDIM);
+		/* Set the acount, bcount, ccount registers */
+		edma_parm_write(ctlr, PARM_A_B_CNT, slot, (bcnt << 16) | acnt);
+		edma_parm_write(ctlr, PARM_CCNT, slot, ccnt);
+	}
+}
+EXPORT_SYMBOL(edma_set_transfer_params);
+
+/**
+ * edma_link - link one parameter RAM slot to another
+ * @from: parameter RAM slot originating the link
+ * @to: parameter RAM slot which is the link target
+ *
+ * The originating slot should not be part of any active DMA transfer.
+ */
+void edma_link(unsigned from, unsigned to)
+{
+	unsigned ctlr_from, ctlr_to;
+
+	ctlr_from = EDMA_CTLR(from);
+	from = EDMA_CHAN_SLOT(from);
+	ctlr_to = EDMA_CTLR(to);
+	to = EDMA_CHAN_SLOT(to);
+
+	if (from >= edma_cc[ctlr_from]->num_slots)
+		return;
+	if (to >= edma_cc[ctlr_to]->num_slots)
+		return;
+	edma_parm_modify(ctlr_from, PARM_LINK_BCNTRLD, from, 0xffff0000,
+				PARM_OFFSET(to));
+}
+EXPORT_SYMBOL(edma_link);
+
+/**
+ * edma_unlink - cut link from one parameter RAM slot
+ * @from: parameter RAM slot originating the link
+ *
+ * The originating slot should not be part of any active DMA transfer.
+ * Its link is set to 0xffff.
+ */
+void edma_unlink(unsigned from)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(from);
+	from = EDMA_CHAN_SLOT(from);
+
+	if (from >= edma_cc[ctlr]->num_slots)
+		return;
+	edma_parm_or(ctlr, PARM_LINK_BCNTRLD, from, 0xffff);
+}
+EXPORT_SYMBOL(edma_unlink);
+
+/*-----------------------------------------------------------------------*/
+
+/* Parameter RAM operations (ii) -- read/write whole parameter sets */
+
+/**
+ * edma_write_slot - write parameter RAM data for slot
+ * @slot: number of parameter RAM slot being modified
+ * @param: data to be written into parameter RAM slot
+ *
+ * Use this to assign all parameters of a transfer at once.  This
+ * allows more efficient setup of transfers than issuing multiple
+ * calls to set up those parameters in small pieces, and provides
+ * complete control over all transfer options.
+ */
+void edma_write_slot(unsigned slot, const struct edmacc_param *param)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(slot);
+	slot = EDMA_CHAN_SLOT(slot);
+
+	if (slot >= edma_cc[ctlr]->num_slots)
+		return;
+	memcpy_toio(edmacc_regs_base[ctlr] + PARM_OFFSET(slot), param,
+			PARM_SIZE);
+}
+EXPORT_SYMBOL(edma_write_slot);
+
+/**
+ * edma_read_slot - read parameter RAM data from slot
+ * @slot: number of parameter RAM slot being copied
+ * @param: where to store copy of parameter RAM data
+ *
+ * Use this to read data from a parameter RAM slot, perhaps to
+ * save them as a template for later reuse.
+ */
+void edma_read_slot(unsigned slot, struct edmacc_param *param)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(slot);
+	slot = EDMA_CHAN_SLOT(slot);
+
+	if (slot >= edma_cc[ctlr]->num_slots)
+		return;
+	memcpy_fromio(param, edmacc_regs_base[ctlr] + PARM_OFFSET(slot),
+			PARM_SIZE);
+}
+EXPORT_SYMBOL(edma_read_slot);
+
+/*-----------------------------------------------------------------------*/
+
+/* Various EDMA channel control operations */
+
+/**
+ * edma_pause - pause dma on a channel
+ * @channel: on which edma_start() has been called
+ *
+ * This temporarily disables EDMA hardware events on the specified channel,
+ * preventing them from triggering new transfers on its behalf
+ */
+void edma_pause(unsigned channel)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(channel);
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < edma_cc[ctlr]->num_channels) {
+		unsigned int mask = BIT(channel & 0x1f);
+
+		edma_shadow0_write_array(ctlr, SH_EECR, channel >> 5, mask);
+	}
+}
+EXPORT_SYMBOL(edma_pause);
+
+/**
+ * edma_resume - resumes dma on a paused channel
+ * @channel: on which edma_pause() has been called
+ *
+ * This re-enables EDMA hardware events on the specified channel.
+ */
+void edma_resume(unsigned channel)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(channel);
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < edma_cc[ctlr]->num_channels) {
+		unsigned int mask = BIT(channel & 0x1f);
+
+		edma_shadow0_write_array(ctlr, SH_EESR, channel >> 5, mask);
+	}
+}
+EXPORT_SYMBOL(edma_resume);
+
+int edma_trigger_channel(unsigned channel)
+{
+	unsigned ctlr;
+	unsigned int mask;
+
+	ctlr = EDMA_CTLR(channel);
+	channel = EDMA_CHAN_SLOT(channel);
+	mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(ctlr, SH_ESR, (channel >> 5), mask);
+
+	pr_debug("EDMA: ESR%d %08x\n", (channel >> 5),
+		 edma_shadow0_read_array(ctlr, SH_ESR, (channel >> 5)));
+	return 0;
+}
+EXPORT_SYMBOL(edma_trigger_channel);
+
+/**
+ * edma_start - start dma on a channel
+ * @channel: channel being activated
+ *
+ * Channels with event associations will be triggered by their hardware
+ * events, and channels without such associations will be triggered by
+ * software.  (At this writing there is no interface for using software
+ * triggers except with channels that don't support hardware triggers.)
+ *
+ * Returns zero on success, else negative errno.
+ */
+int edma_start(unsigned channel)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(channel);
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < edma_cc[ctlr]->num_channels) {
+		int j = channel >> 5;
+		unsigned int mask = BIT(channel & 0x1f);
+
+		/* EDMA channels without event association */
+		if (test_bit(channel, edma_cc[ctlr]->edma_unused)) {
+			pr_debug("EDMA: ESR%d %08x\n", j,
+				edma_shadow0_read_array(ctlr, SH_ESR, j));
+			edma_shadow0_write_array(ctlr, SH_ESR, j, mask);
+			return 0;
+		}
+
+		/* EDMA channel with event association */
+		pr_debug("EDMA: ER%d %08x\n", j,
+			edma_shadow0_read_array(ctlr, SH_ER, j));
+		/* Clear any pending event or error */
+		edma_write_array(ctlr, EDMA_ECR, j, mask);
+		edma_write_array(ctlr, EDMA_EMCR, j, mask);
+		/* Clear any SER */
+		edma_shadow0_write_array(ctlr, SH_SECR, j, mask);
+		edma_shadow0_write_array(ctlr, SH_EESR, j, mask);
+		pr_debug("EDMA: EER%d %08x\n", j,
+			edma_shadow0_read_array(ctlr, SH_EER, j));
+		return 0;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(edma_start);
+
+/**
+ * edma_stop - stops dma on the channel passed
+ * @channel: channel being deactivated
+ *
+ * When @lch is a channel, any active transfer is paused and
+ * all pending hardware events are cleared.  The current transfer
+ * may not be resumed, and the channel's Parameter RAM should be
+ * reinitialized before being reused.
+ */
+void edma_stop(unsigned channel)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(channel);
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < edma_cc[ctlr]->num_channels) {
+		int j = channel >> 5;
+		unsigned int mask = BIT(channel & 0x1f);
+
+		edma_shadow0_write_array(ctlr, SH_EECR, j, mask);
+		edma_shadow0_write_array(ctlr, SH_ECR, j, mask);
+		edma_shadow0_write_array(ctlr, SH_SECR, j, mask);
+		edma_write_array(ctlr, EDMA_EMCR, j, mask);
+
+		pr_debug("EDMA: EER%d %08x\n", j,
+				edma_shadow0_read_array(ctlr, SH_EER, j));
+
+		/* REVISIT:  consider guarding against inappropriate event
+		 * chaining by overwriting with dummy_paramset.
+		 */
+	}
+}
+EXPORT_SYMBOL(edma_stop);
+
+/******************************************************************************
+ *
+ * It cleans ParamEntry qand bring back EDMA to initial state if media has
+ * been removed before EDMA has finished.It is usedful for removable media.
+ * Arguments:
+ *      ch_no     - channel no
+ *
+ * Return: zero on success, or corresponding error no on failure
+ *
+ * FIXME this should not be needed ... edma_stop() should suffice.
+ *
+ *****************************************************************************/
+
+void edma_clean_channel(unsigned channel)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(channel);
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel < edma_cc[ctlr]->num_channels) {
+		int j = (channel >> 5);
+		unsigned int mask = BIT(channel & 0x1f);
+
+		pr_debug("EDMA: EMR%d %08x\n", j,
+				edma_read_array(ctlr, EDMA_EMR, j));
+		edma_shadow0_write_array(ctlr, SH_ECR, j, mask);
+		/* Clear the corresponding EMR bits */
+		edma_write_array(ctlr, EDMA_EMCR, j, mask);
+		/* Clear any SER */
+		edma_shadow0_write_array(ctlr, SH_SECR, j, mask);
+		edma_write(ctlr, EDMA_CCERRCLR, BIT(16) | BIT(1) | BIT(0));
+	}
+}
+EXPORT_SYMBOL(edma_clean_channel);
+
+/*
+ * edma_clear_event - clear an outstanding event on the DMA channel
+ * Arguments:
+ *	channel - channel number
+ */
+void edma_clear_event(unsigned channel)
+{
+	unsigned ctlr;
+
+	ctlr = EDMA_CTLR(channel);
+	channel = EDMA_CHAN_SLOT(channel);
+
+	if (channel >= edma_cc[ctlr]->num_channels)
+		return;
+	if (channel < 32)
+		edma_write(ctlr, EDMA_ECR, BIT(channel));
+	else
+		edma_write(ctlr, EDMA_ECRH, BIT(channel - 32));
+}
+EXPORT_SYMBOL(edma_clear_event);
+
+static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
+			      struct edma *edma_cc)
+{
+	int i;
+	u32 value, cccfg;
+	s8 (*queue_priority_map)[2];
+
+	/* Decode the eDMA3 configuration from CCCFG register */
+	cccfg = edma_read(0, EDMA_CCCFG);
+
+	value = GET_NUM_REGN(cccfg);
+	edma_cc->num_region = BIT(value);
+
+	value = GET_NUM_DMACH(cccfg);
+	edma_cc->num_channels = BIT(value + 1);
+
+	value = GET_NUM_PAENTRY(cccfg);
+	edma_cc->num_slots = BIT(value + 4);
+
+	value = GET_NUM_EVQUE(cccfg);
+	edma_cc->num_tc = value + 1;
+
+	dev_dbg(dev, "eDMA3 HW configuration (cccfg: 0x%08x):\n", cccfg);
+	dev_dbg(dev, "num_region: %u\n", edma_cc->num_region);
+	dev_dbg(dev, "num_channel: %u\n", edma_cc->num_channels);
+	dev_dbg(dev, "num_slot: %u\n", edma_cc->num_slots);
+	dev_dbg(dev, "num_tc: %u\n", edma_cc->num_tc);
+
+	/* Nothing need to be done if queue priority is provided */
+	if (pdata->queue_priority_mapping)
+		return 0;
+
+	/*
+	 * Configure TC/queue priority as follows:
+	 * Q0 - priority 0
+	 * Q1 - priority 1
+	 * Q2 - priority 2
+	 * ...
+	 * The meaning of priority numbers: 0 highest priority, 7 lowest
+	 * priority. So Q0 is the highest priority queue and the last queue has
+	 * the lowest priority.
+	 */
+	queue_priority_map = devm_kzalloc(dev,
+					  (edma_cc->num_tc + 1) * sizeof(s8),
+					  GFP_KERNEL);
+	if (!queue_priority_map)
+		return -ENOMEM;
+
+	for (i = 0; i < edma_cc->num_tc; i++) {
+		queue_priority_map[i][0] = i;
+		queue_priority_map[i][1] = i;
+	}
+	queue_priority_map[i][0] = -1;
+	queue_priority_map[i][1] = -1;
+
+	pdata->queue_priority_mapping = queue_priority_map;
+	pdata->default_queue = 0;
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_DMADEVICES)
+
+static int edma_xbar_event_map(struct device *dev, struct device_node *node,
+			       struct edma_soc_info *pdata, size_t sz)
+{
+	const char pname[] = "ti,edma-xbar-event-map";
+	struct resource res;
+	void __iomem *xbar;
+	s16 (*xbar_chans)[2];
+	size_t nelm = sz / sizeof(s16);
+	u32 shift, offset, mux;
+	int ret, i;
+
+	xbar_chans = devm_kzalloc(dev, (nelm + 2) * sizeof(s16), GFP_KERNEL);
+	if (!xbar_chans)
+		return -ENOMEM;
+
+	ret = of_address_to_resource(node, 1, &res);
+	if (ret)
+		return -ENOMEM;
+
+	xbar = devm_ioremap(dev, res.start, resource_size(&res));
+	if (!xbar)
+		return -ENOMEM;
+
+	ret = of_property_read_u16_array(node, pname, (u16 *)xbar_chans, nelm);
+	if (ret)
+		return -EIO;
+
+	/* Invalidate last entry for the other user of this mess */
+	nelm >>= 1;
+	xbar_chans[nelm][0] = xbar_chans[nelm][1] = -1;
+
+	for (i = 0; i < nelm; i++) {
+		shift = (xbar_chans[i][1] & 0x03) << 3;
+		offset = xbar_chans[i][1] & 0xfffffffc;
+		mux = readl(xbar + offset);
+		mux &= ~(0xff << shift);
+		mux |= xbar_chans[i][0] << shift;
+		writel(mux, (xbar + offset));
+	}
+
+	pdata->xbar_chans = (const s16 (*)[2]) xbar_chans;
+	return 0;
+}
+
+static int edma_of_parse_dt(struct device *dev,
+			    struct device_node *node,
+			    struct edma_soc_info *pdata)
+{
+	int ret = 0;
+	struct property *prop;
+	size_t sz;
+	struct edma_rsv_info *rsv_info;
+
+	rsv_info = devm_kzalloc(dev, sizeof(struct edma_rsv_info), GFP_KERNEL);
+	if (!rsv_info)
+		return -ENOMEM;
+	pdata->rsv = rsv_info;
+
+	prop = of_find_property(node, "ti,edma-xbar-event-map", &sz);
+	if (prop)
+		ret = edma_xbar_event_map(dev, node, pdata, sz);
+
+	return ret;
+}
+
+static struct of_dma_filter_info edma_filter_info = {
+	.filter_fn = edma_filter_fn,
+};
+
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+						      struct device_node *node)
+{
+	struct edma_soc_info *info;
+	int ret;
+
+	info = devm_kzalloc(dev, sizeof(struct edma_soc_info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	ret = edma_of_parse_dt(dev, node, info);
+	if (ret)
+		return ERR_PTR(ret);
+
+	dma_cap_set(DMA_SLAVE, edma_filter_info.dma_cap);
+	dma_cap_set(DMA_CYCLIC, edma_filter_info.dma_cap);
+	of_dma_controller_register(dev->of_node, of_dma_simple_xlate,
+				   &edma_filter_info);
+
+	return info;
+}
+#else
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+						      struct device_node *node)
+{
+	return ERR_PTR(-ENOSYS);
+}
+#endif
+
+static int edma_probe(struct platform_device *pdev)
+{
+	struct edma_soc_info	**info = pdev->dev.platform_data;
+	struct edma_soc_info    *ninfo[EDMA_MAX_CC] = {NULL};
+	s8		(*queue_priority_mapping)[2];
+	int			i, j, off, ln, found = 0;
+	int			status = -1;
+	const s16		(*rsv_chans)[2];
+	const s16		(*rsv_slots)[2];
+	const s16		(*xbar_chans)[2];
+	int			irq[EDMA_MAX_CC] = {0, 0};
+	int			err_irq[EDMA_MAX_CC] = {0, 0};
+	struct resource		*r[EDMA_MAX_CC] = {NULL};
+	struct resource		res[EDMA_MAX_CC];
+	char			res_name[10];
+	struct device_node	*node = pdev->dev.of_node;
+	struct device		*dev = &pdev->dev;
+	int			ret;
+
+	if (node) {
+		/* Check if this is a second instance registered */
+		if (arch_num_cc) {
+			dev_err(dev, "only one EDMA instance is supported via DT\n");
+			return -ENODEV;
+		}
+
+		ninfo[0] = edma_setup_info_from_dt(dev, node);
+		if (IS_ERR(ninfo[0])) {
+			dev_err(dev, "failed to get DT data\n");
+			return PTR_ERR(ninfo[0]);
+		}
+
+		info = ninfo;
+	}
+
+	if (!info)
+		return -ENODEV;
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get_sync() failed\n");
+		return ret;
+	}
+
+	for (j = 0; j < EDMA_MAX_CC; j++) {
+		if (!info[j]) {
+			if (!found)
+				return -ENODEV;
+			break;
+		}
+		if (node) {
+			ret = of_address_to_resource(node, j, &res[j]);
+			if (!ret)
+				r[j] = &res[j];
+		} else {
+			sprintf(res_name, "edma_cc%d", j);
+			r[j] = platform_get_resource_byname(pdev,
+						IORESOURCE_MEM,
+						res_name);
+		}
+		if (!r[j]) {
+			if (found)
+				break;
+			else
+				return -ENODEV;
+		} else {
+			found = 1;
+		}
+
+		edmacc_regs_base[j] = devm_ioremap_resource(&pdev->dev, r[j]);
+		if (IS_ERR(edmacc_regs_base[j]))
+			return PTR_ERR(edmacc_regs_base[j]);
+
+		edma_cc[j] = devm_kzalloc(&pdev->dev, sizeof(struct edma),
+					  GFP_KERNEL);
+		if (!edma_cc[j])
+			return -ENOMEM;
+
+		/* Get eDMA3 configuration from IP */
+		ret = edma_setup_from_hw(dev, info[j], edma_cc[j]);
+		if (ret)
+			return ret;
+
+		edma_cc[j]->default_queue = info[j]->default_queue;
+
+		dev_dbg(&pdev->dev, "DMA REG BASE ADDR=%p\n",
+			edmacc_regs_base[j]);
+
+		for (i = 0; i < edma_cc[j]->num_slots; i++)
+			memcpy_toio(edmacc_regs_base[j] + PARM_OFFSET(i),
+					&dummy_paramset, PARM_SIZE);
+
+		/* Mark all channels as unused */
+		memset(edma_cc[j]->edma_unused, 0xff,
+			sizeof(edma_cc[j]->edma_unused));
+
+		if (info[j]->rsv) {
+
+			/* Clear the reserved channels in unused list */
+			rsv_chans = info[j]->rsv->rsv_chans;
+			if (rsv_chans) {
+				for (i = 0; rsv_chans[i][0] != -1; i++) {
+					off = rsv_chans[i][0];
+					ln = rsv_chans[i][1];
+					clear_bits(off, ln,
+						  edma_cc[j]->edma_unused);
+				}
+			}
+
+			/* Set the reserved slots in inuse list */
+			rsv_slots = info[j]->rsv->rsv_slots;
+			if (rsv_slots) {
+				for (i = 0; rsv_slots[i][0] != -1; i++) {
+					off = rsv_slots[i][0];
+					ln = rsv_slots[i][1];
+					set_bits(off, ln,
+						edma_cc[j]->edma_inuse);
+				}
+			}
+		}
+
+		/* Clear the xbar mapped channels in unused list */
+		xbar_chans = info[j]->xbar_chans;
+		if (xbar_chans) {
+			for (i = 0; xbar_chans[i][1] != -1; i++) {
+				off = xbar_chans[i][1];
+				clear_bits(off, 1,
+					   edma_cc[j]->edma_unused);
+			}
+		}
+
+		if (node) {
+			irq[j] = irq_of_parse_and_map(node, 0);
+			err_irq[j] = irq_of_parse_and_map(node, 2);
+		} else {
+			char irq_name[10];
+
+			sprintf(irq_name, "edma%d", j);
+			irq[j] = platform_get_irq_byname(pdev, irq_name);
+
+			sprintf(irq_name, "edma%d_err", j);
+			err_irq[j] = platform_get_irq_byname(pdev, irq_name);
+		}
+		edma_cc[j]->irq_res_start = irq[j];
+		edma_cc[j]->irq_res_end = err_irq[j];
+
+		status = devm_request_irq(dev, irq[j], dma_irq_handler, 0,
+					  "edma", dev);
+		if (status < 0) {
+			dev_dbg(&pdev->dev,
+				"devm_request_irq %d failed --> %d\n",
+				irq[j], status);
+			return status;
+		}
+
+		status = devm_request_irq(dev, err_irq[j], dma_ccerr_handler, 0,
+					  "edma_error", dev);
+		if (status < 0) {
+			dev_dbg(&pdev->dev,
+				"devm_request_irq %d failed --> %d\n",
+				err_irq[j], status);
+			return status;
+		}
+
+		for (i = 0; i < edma_cc[j]->num_channels; i++)
+			map_dmach_queue(j, i, info[j]->default_queue);
+
+		queue_priority_mapping = info[j]->queue_priority_mapping;
+
+		/* Event queue priority mapping */
+		for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+			assign_priority_to_queue(j,
+						queue_priority_mapping[i][0],
+						queue_priority_mapping[i][1]);
+
+		/* Map the channel to param entry if channel mapping logic
+		 * exist
+		 */
+		if (edma_read(j, EDMA_CCCFG) & CHMAP_EXIST)
+			map_dmach_param(j);
+
+		for (i = 0; i < edma_cc[j]->num_region; i++) {
+			edma_write_array2(j, EDMA_DRAE, i, 0, 0x0);
+			edma_write_array2(j, EDMA_DRAE, i, 1, 0x0);
+			edma_write_array(j, EDMA_QRAE, i, 0x0);
+		}
+		arch_num_cc++;
+	}
+
+	return 0;
+}
+
+static struct platform_driver edma_driver = {
+	.driver = {
+		.name	= "edma",
+		.of_match_table = edma_of_ids,
+	},
+	.probe = edma_probe,
+};
+
+static int __init edma_init(void)
+{
+	return platform_driver_probe(&edma_driver, edma_probe);
+}
+arch_initcall(edma_init);
+
diff --git a/arch/arm/common/firmware.c b/arch/arm/common/firmware.c
new file mode 100644
index 00000000000..27ddccb1131
--- /dev/null
+++ b/arch/arm/common/firmware.c
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2012 Samsung Electronics.
+ * Kyungmin Park <kyungmin.park@samsung.com>
+ * Tomasz Figa <t.figa@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/suspend.h>
+
+#include <asm/firmware.h>
+
+static const struct firmware_ops default_firmware_ops;
+
+const struct firmware_ops *firmware_ops = &default_firmware_ops;
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
deleted file mode 100644
index 337741f734a..00000000000
--- a/arch/arm/common/gic.c
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- *  linux/arch/arm/common/gic.c
- *
- *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Interrupt architecture for the GIC:
- *
- * o There is one Interrupt Distributor, which receives interrupts
- *   from system devices and sends them to the Interrupt Controllers.
- *
- * o There is one CPU Interface per CPU, which sends interrupts sent
- *   by the Distributor, and interrupts generated locally, to the
- *   associated CPU. The base address of the CPU interface is usually
- *   aliased so that the same address points to different chips depending
- *   on the CPU it is accessed from.
- *
- * Note that IRQs 0-31 are special - they are local to each CPU.
- * As such, the enable set/clear, pending set/clear and active bit
- * registers are banked per-cpu for these sources.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/smp.h>
-#include <linux/cpumask.h>
-#include <linux/io.h>
-
-#include <asm/irq.h>
-#include <asm/mach/irq.h>
-#include <asm/hardware/gic.h>
-
-static DEFINE_SPINLOCK(irq_controller_lock);
-
-struct gic_chip_data {
-	unsigned int irq_offset;
-	void __iomem *dist_base;
-	void __iomem *cpu_base;
-};
-
-#ifndef MAX_GIC_NR
-#define MAX_GIC_NR	1
-#endif
-
-static struct gic_chip_data gic_data[MAX_GIC_NR];
-
-static inline void __iomem *gic_dist_base(unsigned int irq)
-{
-	struct gic_chip_data *gic_data = get_irq_chip_data(irq);
-	return gic_data->dist_base;
-}
-
-static inline void __iomem *gic_cpu_base(unsigned int irq)
-{
-	struct gic_chip_data *gic_data = get_irq_chip_data(irq);
-	return gic_data->cpu_base;
-}
-
-static inline unsigned int gic_irq(unsigned int irq)
-{
-	struct gic_chip_data *gic_data = get_irq_chip_data(irq);
-	return irq - gic_data->irq_offset;
-}
-
-/*
- * Routines to acknowledge, disable and enable interrupts
- *
- * Linux assumes that when we're done with an interrupt we need to
- * unmask it, in the same way we need to unmask an interrupt when
- * we first enable it.
- *
- * The GIC has a separate notion of "end of interrupt" to re-enable
- * an interrupt after handling, in order to support hardware
- * prioritisation.
- *
- * We can make the GIC behave in the way that Linux expects by making
- * our "acknowledge" routine disable the interrupt, then mark it as
- * complete.
- */
-static void gic_ack_irq(unsigned int irq)
-{
-	u32 mask = 1 << (irq % 32);
-
-	spin_lock(&irq_controller_lock);
-	writel(mask, gic_dist_base(irq) + GIC_DIST_ENABLE_CLEAR + (gic_irq(irq) / 32) * 4);
-	writel(gic_irq(irq), gic_cpu_base(irq) + GIC_CPU_EOI);
-	spin_unlock(&irq_controller_lock);
-}
-
-static void gic_mask_irq(unsigned int irq)
-{
-	u32 mask = 1 << (irq % 32);
-
-	spin_lock(&irq_controller_lock);
-	writel(mask, gic_dist_base(irq) + GIC_DIST_ENABLE_CLEAR + (gic_irq(irq) / 32) * 4);
-	spin_unlock(&irq_controller_lock);
-}
-
-static void gic_unmask_irq(unsigned int irq)
-{
-	u32 mask = 1 << (irq % 32);
-
-	spin_lock(&irq_controller_lock);
-	writel(mask, gic_dist_base(irq) + GIC_DIST_ENABLE_SET + (gic_irq(irq) / 32) * 4);
-	spin_unlock(&irq_controller_lock);
-}
-
-#ifdef CONFIG_SMP
-static int gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
-{
-	void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
-	unsigned int shift = (irq % 4) * 8;
-	unsigned int cpu = cpumask_first(mask_val);
-	u32 val;
-
-	spin_lock(&irq_controller_lock);
-	irq_desc[irq].node = cpu;
-	val = readl(reg) & ~(0xff << shift);
-	val |= 1 << (cpu + shift);
-	writel(val, reg);
-	spin_unlock(&irq_controller_lock);
-
-	return 0;
-}
-#endif
-
-static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
-{
-	struct gic_chip_data *chip_data = get_irq_data(irq);
-	struct irq_chip *chip = get_irq_chip(irq);
-	unsigned int cascade_irq, gic_irq;
-	unsigned long status;
-
-	/* primary controller ack'ing */
-	chip->ack(irq);
-
-	spin_lock(&irq_controller_lock);
-	status = readl(chip_data->cpu_base + GIC_CPU_INTACK);
-	spin_unlock(&irq_controller_lock);
-
-	gic_irq = (status & 0x3ff);
-	if (gic_irq == 1023)
-		goto out;
-
-	cascade_irq = gic_irq + chip_data->irq_offset;
-	if (unlikely(gic_irq < 32 || gic_irq > 1020 || cascade_irq >= NR_IRQS))
-		do_bad_IRQ(cascade_irq, desc);
-	else
-		generic_handle_irq(cascade_irq);
-
- out:
-	/* primary controller unmasking */
-	chip->unmask(irq);
-}
-
-static struct irq_chip gic_chip = {
-	.name		= "GIC",
-	.ack		= gic_ack_irq,
-	.mask		= gic_mask_irq,
-	.unmask		= gic_unmask_irq,
-#ifdef CONFIG_SMP
-	.set_affinity	= gic_set_cpu,
-#endif
-};
-
-void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
-{
-	if (gic_nr >= MAX_GIC_NR)
-		BUG();
-	if (set_irq_data(irq, &gic_data[gic_nr]) != 0)
-		BUG();
-	set_irq_chained_handler(irq, gic_handle_cascade_irq);
-}
-
-void __init gic_dist_init(unsigned int gic_nr, void __iomem *base,
-			  unsigned int irq_start)
-{
-	unsigned int max_irq, i;
-	u32 cpumask = 1 << smp_processor_id();
-
-	if (gic_nr >= MAX_GIC_NR)
-		BUG();
-
-	cpumask |= cpumask << 8;
-	cpumask |= cpumask << 16;
-
-	gic_data[gic_nr].dist_base = base;
-	gic_data[gic_nr].irq_offset = (irq_start - 1) & ~31;
-
-	writel(0, base + GIC_DIST_CTRL);
-
-	/*
-	 * Find out how many interrupts are supported.
-	 */
-	max_irq = readl(base + GIC_DIST_CTR) & 0x1f;
-	max_irq = (max_irq + 1) * 32;
-
-	/*
-	 * The GIC only supports up to 1020 interrupt sources.
-	 * Limit this to either the architected maximum, or the
-	 * platform maximum.
-	 */
-	if (max_irq > max(1020, NR_IRQS))
-		max_irq = max(1020, NR_IRQS);
-
-	/*
-	 * Set all global interrupts to be level triggered, active low.
-	 */
-	for (i = 32; i < max_irq; i += 16)
-		writel(0, base + GIC_DIST_CONFIG + i * 4 / 16);
-
-	/*
-	 * Set all global interrupts to this CPU only.
-	 */
-	for (i = 32; i < max_irq; i += 4)
-		writel(cpumask, base + GIC_DIST_TARGET + i * 4 / 4);
-
-	/*
-	 * Set priority on all interrupts.
-	 */
-	for (i = 0; i < max_irq; i += 4)
-		writel(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
-
-	/*
-	 * Disable all interrupts.
-	 */
-	for (i = 0; i < max_irq; i += 32)
-		writel(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32);
-
-	/*
-	 * Setup the Linux IRQ subsystem.
-	 */
-	for (i = irq_start; i < gic_data[gic_nr].irq_offset + max_irq; i++) {
-		set_irq_chip(i, &gic_chip);
-		set_irq_chip_data(i, &gic_data[gic_nr]);
-		set_irq_handler(i, handle_level_irq);
-		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
-	}
-
-	writel(1, base + GIC_DIST_CTRL);
-}
-
-void __cpuinit gic_cpu_init(unsigned int gic_nr, void __iomem *base)
-{
-	if (gic_nr >= MAX_GIC_NR)
-		BUG();
-
-	gic_data[gic_nr].cpu_base = base;
-
-	writel(0xf0, base + GIC_CPU_PRIMASK);
-	writel(1, base + GIC_CPU_CTRL);
-}
-
-#ifdef CONFIG_SMP
-void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
-{
-	unsigned long map = *cpus_addr(*mask);
-
-	/* this always happens on GIC0 */
-	writel(map << 16 | irq, gic_data[0].dist_base + GIC_DIST_SOFTINT);
-}
-#endif
diff --git a/arch/arm/common/icst.c b/arch/arm/common/icst.c
new file mode 100644
index 00000000000..2dc6da70ae5
--- /dev/null
+++ b/arch/arm/common/icst.c
@@ -0,0 +1,100 @@
+/*
+ *  linux/arch/arm/common/icst307.c
+ *
+ *  Copyright (C) 2003 Deep Blue Solutions, Ltd, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Support functions for calculating clocks/divisors for the ICST307
+ *  clock generators.  See http://www.idt.com/ for more information
+ *  on these devices.
+ *
+ *  This is an almost identical implementation to the ICST525 clock generator.
+ *  The s2div and idx2s files are different
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <asm/hardware/icst.h>
+
+/*
+ * Divisors for each OD setting.
+ */
+const unsigned char icst307_s2div[8] = { 10, 2, 8, 4, 5, 7, 3, 6 };
+const unsigned char icst525_s2div[8] = { 10, 2, 8, 4, 5, 7, 9, 6 };
+EXPORT_SYMBOL(icst307_s2div);
+EXPORT_SYMBOL(icst525_s2div);
+
+unsigned long icst_hz(const struct icst_params *p, struct icst_vco vco)
+{
+	return p->ref * 2 * (vco.v + 8) / ((vco.r + 2) * p->s2div[vco.s]);
+}
+
+EXPORT_SYMBOL(icst_hz);
+
+/*
+ * Ascending divisor S values.
+ */
+const unsigned char icst307_idx2s[8] = { 1, 6, 3, 4, 7, 5, 2, 0 };
+const unsigned char icst525_idx2s[8] = { 1, 3, 4, 7, 5, 2, 6, 0 };
+EXPORT_SYMBOL(icst307_idx2s);
+EXPORT_SYMBOL(icst525_idx2s);
+
+struct icst_vco
+icst_hz_to_vco(const struct icst_params *p, unsigned long freq)
+{
+	struct icst_vco vco = { .s = 1, .v = p->vd_max, .r = p->rd_max };
+	unsigned long f;
+	unsigned int i = 0, rd, best = (unsigned int)-1;
+
+	/*
+	 * First, find the PLL output divisor such
+	 * that the PLL output is within spec.
+	 */
+	do {
+		f = freq * p->s2div[p->idx2s[i]];
+
+		if (f > p->vco_min && f <= p->vco_max)
+			break;
+	} while (i < 8);
+
+	if (i >= 8)
+		return vco;
+
+	vco.s = p->idx2s[i];
+
+	/*
+	 * Now find the closest divisor combination
+	 * which gives a PLL output of 'f'.
+	 */
+	for (rd = p->rd_min; rd <= p->rd_max; rd++) {
+		unsigned long fref_div, f_pll;
+		unsigned int vd;
+		int f_diff;
+
+		fref_div = (2 * p->ref) / rd;
+
+		vd = (f + fref_div / 2) / fref_div;
+		if (vd < p->vd_min || vd > p->vd_max)
+			continue;
+
+		f_pll = fref_div * vd;
+		f_diff = f_pll - f;
+		if (f_diff < 0)
+			f_diff = -f_diff;
+
+		if ((unsigned)f_diff < best) {
+			vco.v = vd - 8;
+			vco.r = rd - 2;
+			if (f_diff == 0)
+				break;
+			best = f_diff;
+		}
+	}
+
+	return vco;
+}
+
+EXPORT_SYMBOL(icst_hz_to_vco);
diff --git a/arch/arm/common/icst307.c b/arch/arm/common/icst307.c
deleted file mode 100644
index 6d094c15754..00000000000
--- a/arch/arm/common/icst307.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- *  linux/arch/arm/common/icst307.c
- *
- *  Copyright (C) 2003 Deep Blue Solutions, Ltd, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Support functions for calculating clocks/divisors for the ICST307
- *  clock generators.  See http://www.icst.com/ for more information
- *  on these devices.
- *
- *  This is an almost identical implementation to the ICST525 clock generator.
- *  The s2div and idx2s files are different
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include <asm/hardware/icst307.h>
-
-/*
- * Divisors for each OD setting.
- */
-static unsigned char s2div[8] = { 10, 2, 8, 4, 5, 7, 3, 6 };
-
-unsigned long icst307_khz(const struct icst307_params *p, struct icst307_vco vco)
-{
-	return p->ref * 2 * (vco.v + 8) / ((vco.r + 2) * s2div[vco.s]);
-}
-
-EXPORT_SYMBOL(icst307_khz);
-
-/*
- * Ascending divisor S values.
- */
-static unsigned char idx2s[8] = { 1, 6, 3, 4, 7, 5, 2, 0 };
-
-struct icst307_vco
-icst307_khz_to_vco(const struct icst307_params *p, unsigned long freq)
-{
-	struct icst307_vco vco = { .s = 1, .v = p->vd_max, .r = p->rd_max };
-	unsigned long f;
-	unsigned int i = 0, rd, best = (unsigned int)-1;
-
-	/*
-	 * First, find the PLL output divisor such
-	 * that the PLL output is within spec.
-	 */
-	do {
-		f = freq * s2div[idx2s[i]];
-
-		/*
-		 * f must be between 6MHz and 200MHz (3.3 or 5V)
-		 */
-		if (f > 6000 && f <= p->vco_max)
-			break;
-	} while (i < ARRAY_SIZE(idx2s));
-
-	if (i >= ARRAY_SIZE(idx2s))
-		return vco;
-
-	vco.s = idx2s[i];
-
-	/*
-	 * Now find the closest divisor combination
-	 * which gives a PLL output of 'f'.
-	 */
-	for (rd = p->rd_min; rd <= p->rd_max; rd++) {
-		unsigned long fref_div, f_pll;
-		unsigned int vd;
-		int f_diff;
-
-		fref_div = (2 * p->ref) / rd;
-
-		vd = (f + fref_div / 2) / fref_div;
-		if (vd < p->vd_min || vd > p->vd_max)
-			continue;
-
-		f_pll = fref_div * vd;
-		f_diff = f_pll - f;
-		if (f_diff < 0)
-			f_diff = -f_diff;
-
-		if ((unsigned)f_diff < best) {
-			vco.v = vd - 8;
-			vco.r = rd - 2;
-			if (f_diff == 0)
-				break;
-			best = f_diff;
-		}
-	}
-
-	return vco;
-}
-
-EXPORT_SYMBOL(icst307_khz_to_vco);
-
-struct icst307_vco
-icst307_ps_to_vco(const struct icst307_params *p, unsigned long period)
-{
-	struct icst307_vco vco = { .s = 1, .v = p->vd_max, .r = p->rd_max };
-	unsigned long f, ps;
-	unsigned int i = 0, rd, best = (unsigned int)-1;
-
-	ps = 1000000000UL / p->vco_max;
-
-	/*
-	 * First, find the PLL output divisor such
-	 * that the PLL output is within spec.
-	 */
-	do {
-		f = period / s2div[idx2s[i]];
-
-		/*
-		 * f must be between 6MHz and 200MHz (3.3 or 5V)
-		 */
-		if (f >= ps && f < 1000000000UL / 6000 + 1)
-			break;
-	} while (i < ARRAY_SIZE(idx2s));
-
-	if (i >= ARRAY_SIZE(idx2s))
-		return vco;
-
-	vco.s = idx2s[i];
-
-	ps = 500000000UL / p->ref;
-
-	/*
-	 * Now find the closest divisor combination
-	 * which gives a PLL output of 'f'.
-	 */
-	for (rd = p->rd_min; rd <= p->rd_max; rd++) {
-		unsigned long f_in_div, f_pll;
-		unsigned int vd;
-		int f_diff;
-
-		f_in_div = ps * rd;
-
-		vd = (f_in_div + f / 2) / f;
-		if (vd < p->vd_min || vd > p->vd_max)
-			continue;
-
-		f_pll = (f_in_div + vd / 2) / vd;
-		f_diff = f_pll - f;
-		if (f_diff < 0)
-			f_diff = -f_diff;
-
-		if ((unsigned)f_diff < best) {
-			vco.v = vd - 8;
-			vco.r = rd - 2;
-			if (f_diff == 0)
-				break;
-			best = f_diff;
-		}
-	}
-
-	return vco;
-}
-
-EXPORT_SYMBOL(icst307_ps_to_vco);
diff --git a/arch/arm/common/icst525.c b/arch/arm/common/icst525.c
deleted file mode 100644
index 3d377c5bdef..00000000000
--- a/arch/arm/common/icst525.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- *  linux/arch/arm/common/icst525.c
- *
- *  Copyright (C) 2003 Deep Blue Solutions, Ltd, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Support functions for calculating clocks/divisors for the ICST525
- *  clock generators.  See http://www.icst.com/ for more information
- *  on these devices.
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include <asm/hardware/icst525.h>
-
-/*
- * Divisors for each OD setting.
- */
-static unsigned char s2div[8] = { 10, 2, 8, 4, 5, 7, 9, 6 };
-
-unsigned long icst525_khz(const struct icst525_params *p, struct icst525_vco vco)
-{
-	return p->ref * 2 * (vco.v + 8) / ((vco.r + 2) * s2div[vco.s]);
-}
-
-EXPORT_SYMBOL(icst525_khz);
-
-/*
- * Ascending divisor S values.
- */
-static unsigned char idx2s[] = { 1, 3, 4, 7, 5, 2, 6, 0 };
-
-struct icst525_vco
-icst525_khz_to_vco(const struct icst525_params *p, unsigned long freq)
-{
-	struct icst525_vco vco = { .s = 1, .v = p->vd_max, .r = p->rd_max };
-	unsigned long f;
-	unsigned int i = 0, rd, best = (unsigned int)-1;
-
-	/*
-	 * First, find the PLL output divisor such
-	 * that the PLL output is within spec.
-	 */
-	do {
-		f = freq * s2div[idx2s[i]];
-
-		/*
-		 * f must be between 10MHz and
-		 *  320MHz (5V) or 200MHz (3V)
-		 */
-		if (f > 10000 && f <= p->vco_max)
-			break;
-	} while (i < ARRAY_SIZE(idx2s));
-
-	if (i >= ARRAY_SIZE(idx2s))
-		return vco;
-
-	vco.s = idx2s[i];
-
-	/*
-	 * Now find the closest divisor combination
-	 * which gives a PLL output of 'f'.
-	 */
-	for (rd = p->rd_min; rd <= p->rd_max; rd++) {
-		unsigned long fref_div, f_pll;
-		unsigned int vd;
-		int f_diff;
-
-		fref_div = (2 * p->ref) / rd;
-
-		vd = (f + fref_div / 2) / fref_div;
-		if (vd < p->vd_min || vd > p->vd_max)
-			continue;
-
-		f_pll = fref_div * vd;
-		f_diff = f_pll - f;
-		if (f_diff < 0)
-			f_diff = -f_diff;
-
-		if ((unsigned)f_diff < best) {
-			vco.v = vd - 8;
-			vco.r = rd - 2;
-			if (f_diff == 0)
-				break;
-			best = f_diff;
-		}
-	}
-
-	return vco;
-}
-
-EXPORT_SYMBOL(icst525_khz_to_vco);
-
-struct icst525_vco
-icst525_ps_to_vco(const struct icst525_params *p, unsigned long period)
-{
-	struct icst525_vco vco = { .s = 1, .v = p->vd_max, .r = p->rd_max };
-	unsigned long f, ps;
-	unsigned int i = 0, rd, best = (unsigned int)-1;
-
-	ps = 1000000000UL / p->vco_max;
-
-	/*
-	 * First, find the PLL output divisor such
-	 * that the PLL output is within spec.
-	 */
-	do {
-		f = period / s2div[idx2s[i]];
-
-		/*
-		 * f must be between 10MHz and
-		 *  320MHz (5V) or 200MHz (3V)
-		 */
-		if (f >= ps && f < 100000)
-			break;
-	} while (i < ARRAY_SIZE(idx2s));
-
-	if (i >= ARRAY_SIZE(idx2s))
-		return vco;
-
-	vco.s = idx2s[i];
-
-	ps = 500000000UL / p->ref;
-
-	/*
-	 * Now find the closest divisor combination
-	 * which gives a PLL output of 'f'.
-	 */
-	for (rd = p->rd_min; rd <= p->rd_max; rd++) {
-		unsigned long f_in_div, f_pll;
-		unsigned int vd;
-		int f_diff;
-
-		f_in_div = ps * rd;
-
-		vd = (f_in_div + f / 2) / f;
-		if (vd < p->vd_min || vd > p->vd_max)
-			continue;
-
-		f_pll = (f_in_div + vd / 2) / vd;
-		f_diff = f_pll - f;
-		if (f_diff < 0)
-			f_diff = -f_diff;
-
-		if ((unsigned)f_diff < best) {
-			vco.v = vd - 8;
-			vco.r = rd - 2;
-			if (f_diff == 0)
-				break;
-			best = f_diff;
-		}
-	}
-
-	return vco;
-}
-
-EXPORT_SYMBOL(icst525_ps_to_vco);
diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c
index 2793447621c..5114b68e99d 100644
--- a/arch/arm/common/it8152.c
+++ b/arch/arm/common/it8152.c
@@ -21,19 +21,21 @@
 #include <linux/ptrace.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/export.h>
 
 #include <asm/mach/pci.h>
 #include <asm/hardware/it8152.h>
 
 #define MAX_SLOTS		21
 
-static void it8152_mask_irq(unsigned int irq)
+static void it8152_mask_irq(struct irq_data *d)
 {
+	unsigned int irq = d->irq;
+
        if (irq >= IT8152_LD_IRQ(0)) {
 	       __raw_writel((__raw_readl(IT8152_INTC_LDCNIMR) |
 			    (1 << (irq - IT8152_LD_IRQ(0)))),
@@ -49,8 +51,10 @@ static void it8152_mask_irq(unsigned int irq)
        }
 }
 
-static void it8152_unmask_irq(unsigned int irq)
+static void it8152_unmask_irq(struct irq_data *d)
 {
+	unsigned int irq = d->irq;
+
        if (irq >= IT8152_LD_IRQ(0)) {
 	       __raw_writel((__raw_readl(IT8152_INTC_LDCNIMR) &
 			     ~(1 << (irq - IT8152_LD_IRQ(0)))),
@@ -68,9 +72,9 @@ static void it8152_unmask_irq(unsigned int irq)
 
 static struct irq_chip it8152_irq_chip = {
 	.name		= "it8152",
-	.ack		= it8152_mask_irq,
-	.mask		= it8152_mask_irq,
-	.unmask		= it8152_unmask_irq,
+	.irq_ack	= it8152_mask_irq,
+	.irq_mask	= it8152_mask_irq,
+	.irq_unmask	= it8152_unmask_irq,
 };
 
 void it8152_init_irq(void)
@@ -85,8 +89,8 @@ void it8152_init_irq(void)
 	__raw_writel((0), IT8152_INTC_LDCNIRR);
 
 	for (irq = IT8152_IRQ(0); irq <= IT8152_LAST_IRQ; irq++) {
-		set_irq_chip(irq, &it8152_irq_chip);
-		set_irq_handler(irq, handle_level_irq);
+		irq_set_chip_and_handler(irq, &it8152_irq_chip,
+					 handle_level_irq);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 	}
 }
@@ -141,7 +145,7 @@ void it8152_irq_demux(unsigned int irq, struct irq_desc *desc)
 }
 
 /* mapping for on-chip devices */
-int __init it8152_pci_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+int __init it8152_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	if ((dev->vendor == PCI_VENDOR_ID_ITE) &&
 	    (dev->device == PCI_DEVICE_ID_ITE_8152)) {
@@ -218,7 +222,7 @@ static int it8152_pci_write_config(struct pci_bus *bus,
 	return PCIBIOS_SUCCESSFUL;
 }
 
-static struct pci_ops it8152_ops = {
+struct pci_ops it8152_ops = {
 	.read = it8152_pci_read_config,
 	.write = it8152_pci_write_config,
 };
@@ -237,9 +241,15 @@ static struct resource it8152_mem = {
 
 /*
  * The following functions are needed for DMA bouncing.
- * ITE8152 chip can addrees up to 64MByte, so all the devices
+ * ITE8152 chip can address up to 64MByte, so all the devices
  * connected to ITE8152 (PCI and USB) should have limited DMA window
  */
+static int it8152_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
+{
+	dev_dbg(dev, "%s: dma_addr %08x, size %08x\n",
+		__func__, dma_addr, size);
+	return (dma_addr + size - PHYS_OFFSET) >= SZ_64M;
+}
 
 /*
  * Setup DMA mask to 64MB on devices connected to ITE8152. Ignore all
@@ -247,52 +257,25 @@ static struct resource it8152_mem = {
  */
 static int it8152_pci_platform_notify(struct device *dev)
 {
-	if (dev->bus == &pci_bus_type) {
+	if (dev_is_pci(dev)) {
 		if (dev->dma_mask)
 			*dev->dma_mask = (SZ_64M - 1) | PHYS_OFFSET;
 		dev->coherent_dma_mask = (SZ_64M - 1) | PHYS_OFFSET;
-		dmabounce_register_dev(dev, 2048, 4096);
+		dmabounce_register_dev(dev, 2048, 4096, it8152_needs_bounce);
 	}
 	return 0;
 }
 
 static int it8152_pci_platform_notify_remove(struct device *dev)
 {
-	if (dev->bus == &pci_bus_type)
+	if (dev_is_pci(dev))
 		dmabounce_unregister_dev(dev);
 
 	return 0;
 }
 
-int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size)
-{
-	dev_dbg(dev, "%s: dma_addr %08x, size %08x\n",
-		__func__, dma_addr, size);
-	return (dev->bus == &pci_bus_type) &&
-		((dma_addr + size - PHYS_OFFSET) >= SZ_64M);
-}
-
-/*
- * We override these so we properly do dmabounce otherwise drivers
- * are able to set the dma_mask to 0xffffffff and we can no longer
- * trap bounces. :(
- *
- * We just return true on everyhing except for < 64MB in which case
- * we will fail miseralby and die since we can't handle that case.
- */
-int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
+int dma_set_coherent_mask(struct device *dev, u64 mask)
 {
-	dev_dbg(&dev->dev, "%s: %llx\n", __func__, mask);
-	if (mask >= PHYS_OFFSET + SZ_64M - 1)
-		return 0;
-
-	return -EIO;
-}
-
-int
-pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
-{
-	dev_dbg(&dev->dev, "%s: %llx\n", __func__, mask);
 	if (mask >= PHYS_OFFSET + SZ_64M - 1)
 		return 0;
 
@@ -301,11 +284,17 @@ pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
 
 int __init it8152_pci_setup(int nr, struct pci_sys_data *sys)
 {
-	it8152_io.start = IT8152_IO_BASE + 0x12000;
-	it8152_io.end	= IT8152_IO_BASE + 0x12000 + 0x100000;
+	/*
+	 * FIXME: use pci_ioremap_io to remap the IO space here and
+	 * move over to the generic io.h implementation.
+	 * This requires solving the same problem for PXA PCMCIA
+	 * support.
+	 */
+	it8152_io.start = (unsigned long)IT8152_IO_BASE + 0x12000;
+	it8152_io.end	= (unsigned long)IT8152_IO_BASE + 0x12000 + 0x100000;
 
 	sys->mem_offset = 0x10000000;
-	sys->io_offset  = IT8152_IO_BASE;
+	sys->io_offset  = (unsigned long)IT8152_IO_BASE;
 
 	if (request_resource(&ioport_resource, &it8152_io)) {
 		printk(KERN_ERR "PCI: unable to allocate IO region\n");
@@ -316,8 +305,8 @@ int __init it8152_pci_setup(int nr, struct pci_sys_data *sys)
 		goto err1;
 	}
 
-	sys->resource[0] = &it8152_io;
-	sys->resource[1] = &it8152_mem;
+	pci_add_resource_offset(&sys->resources, &it8152_io, sys->io_offset);
+	pci_add_resource_offset(&sys->resources, &it8152_mem, sys->mem_offset);
 
 	if (platform_notify || platform_notify_remove) {
 		printk(KERN_ERR "PCI: Can't use platform_notify\n");
@@ -337,13 +326,9 @@ err0:
 	return -EBUSY;
 }
 
-/*
- * If we set up a device for bus mastering, we need to check the latency
- * timer as we don't have even crappy BIOSes to set it properly.
- * The implementation is from arch/i386/pci/i386.c
- */
-unsigned int pcibios_max_latency = 255;
-
+/* ITE bridge requires setting latency timer to avoid early bus access
+   termination by PCI bus master devices
+*/
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
@@ -367,8 +352,4 @@ void pcibios_set_master(struct pci_dev *dev)
 }
 
 
-struct pci_bus * __init it8152_pci_scan_bus(int nr, struct pci_sys_data *sys)
-{
-	return pci_scan_bus(nr, &it8152_ops, sys);
-}
-
+EXPORT_SYMBOL(dma_set_coherent_mask);
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index bd36c778c81..b55c3625d7e 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -32,6 +32,12 @@
 
 #include <asm/hardware/locomo.h>
 
+/* LoCoMo Interrupts */
+#define IRQ_LOCOMO_KEY		(0)
+#define IRQ_LOCOMO_GPIO		(1)
+#define IRQ_LOCOMO_LT		(2)
+#define IRQ_LOCOMO_SPI		(3)
+
 /* M62332 output channel selection */
 #define M62332_EVR_CH	1	/* M62332 volume channel number  */
 				/*   0 : CH.1 , 1 : CH. 2        */
@@ -58,6 +64,7 @@ struct locomo {
 	struct device *dev;
 	unsigned long phys;
 	unsigned int irq;
+	int irq_base;
 	spinlock_t lock;
 	void __iomem *base;
 #ifdef CONFIG_PM
@@ -81,9 +88,7 @@ struct locomo_dev_info {
 static struct locomo_dev_info locomo_devices[] = {
 	{
 		.devid 		= LOCOMO_DEVID_KEYBOARD,
-		.irq = {
-			IRQ_LOCOMO_KEY,
-		},
+		.irq		= { IRQ_LOCOMO_KEY },
 		.name		= "locomo-keyboard",
 		.offset		= LOCOMO_KEYBOARD,
 		.length		= 16,
@@ -133,53 +138,20 @@ static struct locomo_dev_info locomo_devices[] = {
 	},
 };
 
-
-/** LoCoMo interrupt handling stuff.
- * NOTE: LoCoMo has a 1 to many mapping on all of its IRQs.
- * that is, there is only one real hardware interrupt
- * we determine which interrupt it is by reading some IO memory.
- * We have two levels of expansion, first in the handler for the
- * hardware interrupt we generate an interrupt
- * IRQ_LOCOMO_*_BASE and those handlers generate more interrupts
- *
- * hardware irq reads LOCOMO_ICR & 0x0f00
- *   IRQ_LOCOMO_KEY_BASE
- *   IRQ_LOCOMO_GPIO_BASE
- *   IRQ_LOCOMO_LT_BASE
- *   IRQ_LOCOMO_SPI_BASE
- * IRQ_LOCOMO_KEY_BASE reads LOCOMO_KIC & 0x0001
- *   IRQ_LOCOMO_KEY
- * IRQ_LOCOMO_GPIO_BASE reads LOCOMO_GIR & LOCOMO_GPD & 0xffff
- *   IRQ_LOCOMO_GPIO[0-15]
- * IRQ_LOCOMO_LT_BASE reads LOCOMO_LTINT & 0x0001
- *   IRQ_LOCOMO_LT
- * IRQ_LOCOMO_SPI_BASE reads LOCOMO_SPIIR & 0x000F
- *   IRQ_LOCOMO_SPI_RFR
- *   IRQ_LOCOMO_SPI_RFW
- *   IRQ_LOCOMO_SPI_OVRN
- *   IRQ_LOCOMO_SPI_TEND
- */
-
-#define LOCOMO_IRQ_START	(IRQ_LOCOMO_KEY_BASE)
-#define LOCOMO_IRQ_KEY_START	(IRQ_LOCOMO_KEY)
-#define	LOCOMO_IRQ_GPIO_START	(IRQ_LOCOMO_GPIO0)
-#define	LOCOMO_IRQ_LT_START	(IRQ_LOCOMO_LT)
-#define	LOCOMO_IRQ_SPI_START	(IRQ_LOCOMO_SPI_RFR)
-
 static void locomo_handler(unsigned int irq, struct irq_desc *desc)
 {
+	struct locomo *lchip = irq_get_chip_data(irq);
 	int req, i;
-	void __iomem *mapbase = get_irq_chip_data(irq);
 
 	/* Acknowledge the parent IRQ */
-	desc->chip->ack(irq);
+	desc->irq_data.chip->irq_ack(&desc->irq_data);
 
 	/* check why this interrupt was generated */
-	req = locomo_readl(mapbase + LOCOMO_ICR) & 0x0f00;
+	req = locomo_readl(lchip->base + LOCOMO_ICR) & 0x0f00;
 
 	if (req) {
 		/* generate the next interrupt(s) */
-		irq = LOCOMO_IRQ_START;
+		irq = lchip->irq_base;
 		for (i = 0; i <= 3; i++, irq++) {
 			if (req & (0x0100 << i)) {
 				generic_handle_irq(irq);
@@ -189,326 +161,50 @@ static void locomo_handler(unsigned int irq, struct irq_desc *desc)
 	}
 }
 
-static void locomo_ack_irq(unsigned int irq)
+static void locomo_ack_irq(struct irq_data *d)
 {
 }
 
-static void locomo_mask_irq(unsigned int irq)
+static void locomo_mask_irq(struct irq_data *d)
 {
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct locomo *lchip = irq_data_get_irq_chip_data(d);
 	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_ICR);
-	r &= ~(0x0010 << (irq - LOCOMO_IRQ_START));
-	locomo_writel(r, mapbase + LOCOMO_ICR);
+	r = locomo_readl(lchip->base + LOCOMO_ICR);
+	r &= ~(0x0010 << (d->irq - lchip->irq_base));
+	locomo_writel(r, lchip->base + LOCOMO_ICR);
 }
 
-static void locomo_unmask_irq(unsigned int irq)
+static void locomo_unmask_irq(struct irq_data *d)
 {
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct locomo *lchip = irq_data_get_irq_chip_data(d);
 	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_ICR);
-	r |= (0x0010 << (irq - LOCOMO_IRQ_START));
-	locomo_writel(r, mapbase + LOCOMO_ICR);
+	r = locomo_readl(lchip->base + LOCOMO_ICR);
+	r |= (0x0010 << (d->irq - lchip->irq_base));
+	locomo_writel(r, lchip->base + LOCOMO_ICR);
 }
 
 static struct irq_chip locomo_chip = {
-	.name	= "LOCOMO",
-	.ack	= locomo_ack_irq,
-	.mask	= locomo_mask_irq,
-	.unmask	= locomo_unmask_irq,
-};
-
-static void locomo_key_handler(unsigned int irq, struct irq_desc *desc)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-
-	if (locomo_readl(mapbase + LOCOMO_KEYBOARD + LOCOMO_KIC) & 0x0001) {
-		generic_handle_irq(LOCOMO_IRQ_KEY_START);
-	}
-}
-
-static void locomo_key_ack_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_KEYBOARD + LOCOMO_KIC);
-	r &= ~(0x0100 << (irq - LOCOMO_IRQ_KEY_START));
-	locomo_writel(r, mapbase + LOCOMO_KEYBOARD + LOCOMO_KIC);
-}
-
-static void locomo_key_mask_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_KEYBOARD + LOCOMO_KIC);
-	r &= ~(0x0010 << (irq - LOCOMO_IRQ_KEY_START));
-	locomo_writel(r, mapbase + LOCOMO_KEYBOARD + LOCOMO_KIC);
-}
-
-static void locomo_key_unmask_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_KEYBOARD + LOCOMO_KIC);
-	r |= (0x0010 << (irq - LOCOMO_IRQ_KEY_START));
-	locomo_writel(r, mapbase + LOCOMO_KEYBOARD + LOCOMO_KIC);
-}
-
-static struct irq_chip locomo_key_chip = {
-	.name	= "LOCOMO-key",
-	.ack	= locomo_key_ack_irq,
-	.mask	= locomo_key_mask_irq,
-	.unmask	= locomo_key_unmask_irq,
-};
-
-static void locomo_gpio_handler(unsigned int irq, struct irq_desc *desc)
-{
-	int req, i;
-	void __iomem *mapbase = get_irq_chip_data(irq);
-
-	req = 	locomo_readl(mapbase + LOCOMO_GIR) &
-		locomo_readl(mapbase + LOCOMO_GPD) &
-		0xffff;
-
-	if (req) {
-		irq = LOCOMO_IRQ_GPIO_START;
-		for (i = 0; i <= 15; i++, irq++) {
-			if (req & (0x0001 << i)) {
-				generic_handle_irq(irq);
-			}
-		}
-	}
-}
-
-static void locomo_gpio_ack_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_GWE);
-	r |= (0x0001 << (irq - LOCOMO_IRQ_GPIO_START));
-	locomo_writel(r, mapbase + LOCOMO_GWE);
-
-	r = locomo_readl(mapbase + LOCOMO_GIS);
-	r &= ~(0x0001 << (irq - LOCOMO_IRQ_GPIO_START));
-	locomo_writel(r, mapbase + LOCOMO_GIS);
-
-	r = locomo_readl(mapbase + LOCOMO_GWE);
-	r &= ~(0x0001 << (irq - LOCOMO_IRQ_GPIO_START));
-	locomo_writel(r, mapbase + LOCOMO_GWE);
-}
-
-static void locomo_gpio_mask_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_GIE);
-	r &= ~(0x0001 << (irq - LOCOMO_IRQ_GPIO_START));
-	locomo_writel(r, mapbase + LOCOMO_GIE);
-}
-
-static void locomo_gpio_unmask_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_GIE);
-	r |= (0x0001 << (irq - LOCOMO_IRQ_GPIO_START));
-	locomo_writel(r, mapbase + LOCOMO_GIE);
-}
-
-static int GPIO_IRQ_rising_edge;
-static int GPIO_IRQ_falling_edge;
-
-static int locomo_gpio_type(unsigned int irq, unsigned int type)
-{
-	unsigned int mask;
-	void __iomem *mapbase = get_irq_chip_data(irq);
-
-	mask = 1 << (irq - LOCOMO_IRQ_GPIO_START);
-
-	if (type == IRQ_TYPE_PROBE) {
-		if ((GPIO_IRQ_rising_edge | GPIO_IRQ_falling_edge) & mask)
-			return 0;
-		type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING;
-	}
-
-	if (type & IRQ_TYPE_EDGE_RISING)
-		GPIO_IRQ_rising_edge |= mask;
-	else
-		GPIO_IRQ_rising_edge &= ~mask;
-	if (type & IRQ_TYPE_EDGE_FALLING)
-		GPIO_IRQ_falling_edge |= mask;
-	else
-		GPIO_IRQ_falling_edge &= ~mask;
-	locomo_writel(GPIO_IRQ_rising_edge, mapbase + LOCOMO_GRIE);
-	locomo_writel(GPIO_IRQ_falling_edge, mapbase + LOCOMO_GFIE);
-
-	return 0;
-}
-
-static struct irq_chip locomo_gpio_chip = {
-	.name	  = "LOCOMO-gpio",
-	.ack	  = locomo_gpio_ack_irq,
-	.mask	  = locomo_gpio_mask_irq,
-	.unmask	  = locomo_gpio_unmask_irq,
-	.set_type = locomo_gpio_type,
-};
-
-static void locomo_lt_handler(unsigned int irq, struct irq_desc *desc)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-
-	if (locomo_readl(mapbase + LOCOMO_LTINT) & 0x0001) {
-		generic_handle_irq(LOCOMO_IRQ_LT_START);
-	}
-}
-
-static void locomo_lt_ack_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_LTINT);
-	r &= ~(0x0100 << (irq - LOCOMO_IRQ_LT_START));
-	locomo_writel(r, mapbase + LOCOMO_LTINT);
-}
-
-static void locomo_lt_mask_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_LTINT);
-	r &= ~(0x0010 << (irq - LOCOMO_IRQ_LT_START));
-	locomo_writel(r, mapbase + LOCOMO_LTINT);
-}
-
-static void locomo_lt_unmask_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_LTINT);
-	r |= (0x0010 << (irq - LOCOMO_IRQ_LT_START));
-	locomo_writel(r, mapbase + LOCOMO_LTINT);
-}
-
-static struct irq_chip locomo_lt_chip = {
-	.name	= "LOCOMO-lt",
-	.ack	= locomo_lt_ack_irq,
-	.mask	= locomo_lt_mask_irq,
-	.unmask	= locomo_lt_unmask_irq,
-};
-
-static void locomo_spi_handler(unsigned int irq, struct irq_desc *desc)
-{
-	int req, i;
-	void __iomem *mapbase = get_irq_chip_data(irq);
-
-	req = locomo_readl(mapbase + LOCOMO_SPI + LOCOMO_SPIIR) & 0x000F;
-	if (req) {
-		irq = LOCOMO_IRQ_SPI_START;
-
-		for (i = 0; i <= 3; i++, irq++) {
-			if (req & (0x0001 << i)) {
-				generic_handle_irq(irq);
-			}
-		}
-	}
-}
-
-static void locomo_spi_ack_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_SPI + LOCOMO_SPIWE);
-	r |= (0x0001 << (irq - LOCOMO_IRQ_SPI_START));
-	locomo_writel(r, mapbase + LOCOMO_SPI + LOCOMO_SPIWE);
-
-	r = locomo_readl(mapbase + LOCOMO_SPI + LOCOMO_SPIIS);
-	r &= ~(0x0001 << (irq - LOCOMO_IRQ_SPI_START));
-	locomo_writel(r, mapbase + LOCOMO_SPI + LOCOMO_SPIIS);
-
-	r = locomo_readl(mapbase + LOCOMO_SPI + LOCOMO_SPIWE);
-	r &= ~(0x0001 << (irq - LOCOMO_IRQ_SPI_START));
-	locomo_writel(r, mapbase + LOCOMO_SPI + LOCOMO_SPIWE);
-}
-
-static void locomo_spi_mask_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_SPI + LOCOMO_SPIIE);
-	r &= ~(0x0001 << (irq - LOCOMO_IRQ_SPI_START));
-	locomo_writel(r, mapbase + LOCOMO_SPI + LOCOMO_SPIIE);
-}
-
-static void locomo_spi_unmask_irq(unsigned int irq)
-{
-	void __iomem *mapbase = get_irq_chip_data(irq);
-	unsigned int r;
-	r = locomo_readl(mapbase + LOCOMO_SPI + LOCOMO_SPIIE);
-	r |= (0x0001 << (irq - LOCOMO_IRQ_SPI_START));
-	locomo_writel(r, mapbase + LOCOMO_SPI + LOCOMO_SPIIE);
-}
-
-static struct irq_chip locomo_spi_chip = {
-	.name	= "LOCOMO-spi",
-	.ack	= locomo_spi_ack_irq,
-	.mask	= locomo_spi_mask_irq,
-	.unmask	= locomo_spi_unmask_irq,
+	.name		= "LOCOMO",
+	.irq_ack	= locomo_ack_irq,
+	.irq_mask	= locomo_mask_irq,
+	.irq_unmask	= locomo_unmask_irq,
 };
 
 static void locomo_setup_irq(struct locomo *lchip)
 {
-	int irq;
-	void __iomem *irqbase = lchip->base;
+	int irq = lchip->irq_base;
 
 	/*
 	 * Install handler for IRQ_LOCOMO_HW.
 	 */
-	set_irq_type(lchip->irq, IRQ_TYPE_EDGE_FALLING);
-	set_irq_chip_data(lchip->irq, irqbase);
-	set_irq_chained_handler(lchip->irq, locomo_handler);
-
-	/* Install handlers for IRQ_LOCOMO_*_BASE */
-	set_irq_chip(IRQ_LOCOMO_KEY_BASE, &locomo_chip);
-	set_irq_chip_data(IRQ_LOCOMO_KEY_BASE, irqbase);
-	set_irq_chained_handler(IRQ_LOCOMO_KEY_BASE, locomo_key_handler);
-
-	set_irq_chip(IRQ_LOCOMO_GPIO_BASE, &locomo_chip);
-	set_irq_chip_data(IRQ_LOCOMO_GPIO_BASE, irqbase);
-	set_irq_chained_handler(IRQ_LOCOMO_GPIO_BASE, locomo_gpio_handler);
-
-	set_irq_chip(IRQ_LOCOMO_LT_BASE, &locomo_chip);
-	set_irq_chip_data(IRQ_LOCOMO_LT_BASE, irqbase);
-	set_irq_chained_handler(IRQ_LOCOMO_LT_BASE, locomo_lt_handler);
-
-	set_irq_chip(IRQ_LOCOMO_SPI_BASE, &locomo_chip);
-	set_irq_chip_data(IRQ_LOCOMO_SPI_BASE, irqbase);
-	set_irq_chained_handler(IRQ_LOCOMO_SPI_BASE, locomo_spi_handler);
-
-	/* install handlers for IRQ_LOCOMO_KEY_BASE generated interrupts */
-	set_irq_chip(LOCOMO_IRQ_KEY_START, &locomo_key_chip);
-	set_irq_chip_data(LOCOMO_IRQ_KEY_START, irqbase);
-	set_irq_handler(LOCOMO_IRQ_KEY_START, handle_edge_irq);
-	set_irq_flags(LOCOMO_IRQ_KEY_START, IRQF_VALID | IRQF_PROBE);
-
-	/* install handlers for IRQ_LOCOMO_GPIO_BASE generated interrupts */
-	for (irq = LOCOMO_IRQ_GPIO_START; irq < LOCOMO_IRQ_GPIO_START + 16; irq++) {
-		set_irq_chip(irq, &locomo_gpio_chip);
-		set_irq_chip_data(irq, irqbase);
-		set_irq_handler(irq, handle_edge_irq);
-		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-	}
-
-	/* install handlers for IRQ_LOCOMO_LT_BASE generated interrupts */
-	set_irq_chip(LOCOMO_IRQ_LT_START, &locomo_lt_chip);
-	set_irq_chip_data(LOCOMO_IRQ_LT_START, irqbase);
-	set_irq_handler(LOCOMO_IRQ_LT_START, handle_edge_irq);
-	set_irq_flags(LOCOMO_IRQ_LT_START, IRQF_VALID | IRQF_PROBE);
-
-	/* install handlers for IRQ_LOCOMO_SPI_BASE generated interrupts */
-	for (irq = LOCOMO_IRQ_SPI_START; irq < LOCOMO_IRQ_SPI_START + 4; irq++) {
-		set_irq_chip(irq, &locomo_spi_chip);
-		set_irq_chip_data(irq, irqbase);
-		set_irq_handler(irq, handle_edge_irq);
+	irq_set_irq_type(lchip->irq, IRQ_TYPE_EDGE_FALLING);
+	irq_set_chip_data(lchip->irq, lchip);
+	irq_set_chained_handler(lchip->irq, locomo_handler);
+
+	/* Install handlers for IRQ_LOCOMO_* */
+	for ( ; irq <= lchip->irq_base + 3; irq++) {
+		irq_set_chip_and_handler(irq, &locomo_chip, handle_level_irq);
+		irq_set_chip_data(irq, lchip);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 	}
 }
@@ -555,7 +251,8 @@ locomo_init_one_child(struct locomo *lchip, struct locomo_dev_info *info)
 		dev->mapbase = 0;
 	dev->length = info->length;
 
-	memmove(dev->irq, info->irq, sizeof(dev->irq));
+	dev->irq[0] = (lchip->irq_base == NO_IRQ) ?
+			NO_IRQ : lchip->irq_base + info->irq[0];
 
 	ret = device_register(&dev->dev);
 	if (ret) {
@@ -592,7 +289,7 @@ static int locomo_suspend(struct platform_device *dev, pm_message_t state)
 	save->LCM_GPO     = locomo_readl(lchip->base + LOCOMO_GPO);	/* GPIO */
 	locomo_writel(0x00, lchip->base + LOCOMO_GPO);
 	save->LCM_SPICT   = locomo_readl(lchip->base + LOCOMO_SPI + LOCOMO_SPICT);	/* SPI */
-	locomo_writel(0x40, lchip->base + LOCOMO_SPICT);
+	locomo_writel(0x40, lchip->base + LOCOMO_SPI + LOCOMO_SPICT);
 	save->LCM_GPE     = locomo_readl(lchip->base + LOCOMO_GPE);	/* GPIO */
 	locomo_writel(0x00, lchip->base + LOCOMO_GPE);
 	save->LCM_ASD     = locomo_readl(lchip->base + LOCOMO_ASD);	/* ADSTART */
@@ -672,6 +369,7 @@ static int locomo_resume(struct platform_device *dev)
 static int
 __locomo_probe(struct device *me, struct resource *mem, int irq)
 {
+	struct locomo_platform_data *pdata = me->platform_data;
 	struct locomo *lchip;
 	unsigned long r;
 	int i, ret = -ENODEV;
@@ -687,6 +385,7 @@ __locomo_probe(struct device *me, struct resource *mem, int irq)
 
 	lchip->phys = mem->start;
 	lchip->irq = irq;
+	lchip->irq_base = (pdata) ? pdata->irq_base : NO_IRQ;
 
 	/*
 	 * Map the whole region.  This also maps the
@@ -718,7 +417,7 @@ __locomo_probe(struct device *me, struct resource *mem, int irq)
 	/* Longtime timer */
 	locomo_writel(0, lchip->base + LOCOMO_LTINT);
 	/* SPI */
-	locomo_writel(0, lchip->base + LOCOMO_SPIIE);
+	locomo_writel(0, lchip->base + LOCOMO_SPI + LOCOMO_SPIIE);
 
 	locomo_writel(6 + 8 + 320 + 30 - 10, lchip->base + LOCOMO_ASD);
 	r = locomo_readl(lchip->base + LOCOMO_ASD);
@@ -753,7 +452,7 @@ __locomo_probe(struct device *me, struct resource *mem, int irq)
 	 * The interrupt controller must be initialised before any
 	 * other device to ensure that the interrupts are available.
 	 */
-	if (lchip->irq != NO_IRQ)
+	if (lchip->irq != NO_IRQ && lchip->irq_base != NO_IRQ)
 		locomo_setup_irq(lchip);
 
 	for (i = 0; i < ARRAY_SIZE(locomo_devices); i++)
@@ -776,8 +475,8 @@ static void __locomo_remove(struct locomo *lchip)
 	device_for_each_child(lchip->dev, NULL, locomo_remove_child);
 
 	if (lchip->irq != NO_IRQ) {
-		set_irq_chained_handler(lchip->irq, NULL);
-		set_irq_data(lchip->irq, NULL);
+		irq_set_chained_handler(lchip->irq, NULL);
+		irq_set_handler_data(lchip->irq, NULL);
 	}
 
 	iounmap(lchip->base);
@@ -1007,7 +706,7 @@ void locomo_m62332_senddata(struct locomo_dev *ldev, unsigned int dac_data, int
 	udelay(DAC_SCL_HIGH_HOLD_TIME);	/* 4.7 usec */
 	if (locomo_readl(mapbase + LOCOMO_DAC) & LOCOMO_DAC_SDAOEB) {	/* High is error */
 		printk(KERN_WARNING "locomo: m62332_senddata Error 1\n");
-		return;
+		goto out;
 	}
 
 	/* Send Sub address (LSB is channel select) */
@@ -1035,7 +734,7 @@ void locomo_m62332_senddata(struct locomo_dev *ldev, unsigned int dac_data, int
 	udelay(DAC_SCL_HIGH_HOLD_TIME);	/* 4.7 usec */
 	if (locomo_readl(mapbase + LOCOMO_DAC) & LOCOMO_DAC_SDAOEB) {	/* High is error */
 		printk(KERN_WARNING "locomo: m62332_senddata Error 2\n");
-		return;
+		goto out;
 	}
 
 	/* Send DAC data */
@@ -1060,9 +759,9 @@ void locomo_m62332_senddata(struct locomo_dev *ldev, unsigned int dac_data, int
 	udelay(DAC_SCL_HIGH_HOLD_TIME);	/* 4.7 usec */
 	if (locomo_readl(mapbase + LOCOMO_DAC) & LOCOMO_DAC_SDAOEB) {	/* High is error */
 		printk(KERN_WARNING "locomo: m62332_senddata Error 3\n");
-		return;
 	}
 
+out:
 	/* stop */
 	r = locomo_readl(mapbase + LOCOMO_DAC);
 	r &=  ~(LOCOMO_DAC_SCLOEB);
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
new file mode 100644
index 00000000000..f91136ab447
--- /dev/null
+++ b/arch/arm/common/mcpm_entry.c
@@ -0,0 +1,296 @@
+/*
+ * arch/arm/common/mcpm_entry.c -- entry point for multi-cluster PM
+ *
+ * Created by:  Nicolas Pitre, March 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irqflags.h>
+
+#include <asm/mcpm.h>
+#include <asm/cacheflush.h>
+#include <asm/idmap.h>
+#include <asm/cputype.h>
+
+extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
+
+void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
+{
+	unsigned long val = ptr ? virt_to_phys(ptr) : 0;
+	mcpm_entry_vectors[cluster][cpu] = val;
+	sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
+}
+
+extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2];
+
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+			 unsigned long poke_phys_addr, unsigned long poke_val)
+{
+	unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0];
+	poke[0] = poke_phys_addr;
+	poke[1] = poke_val;
+	__sync_cache_range_w(poke, 2 * sizeof(*poke));
+}
+
+static const struct mcpm_platform_ops *platform_ops;
+
+int __init mcpm_platform_register(const struct mcpm_platform_ops *ops)
+{
+	if (platform_ops)
+		return -EBUSY;
+	platform_ops = ops;
+	return 0;
+}
+
+bool mcpm_is_available(void)
+{
+	return (platform_ops) ? true : false;
+}
+
+int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster)
+{
+	if (!platform_ops)
+		return -EUNATCH; /* try not to shadow power_up errors */
+	might_sleep();
+	return platform_ops->power_up(cpu, cluster);
+}
+
+typedef void (*phys_reset_t)(unsigned long);
+
+void mcpm_cpu_power_down(void)
+{
+	phys_reset_t phys_reset;
+
+	if (WARN_ON_ONCE(!platform_ops || !platform_ops->power_down))
+		return;
+	BUG_ON(!irqs_disabled());
+
+	/*
+	 * Do this before calling into the power_down method,
+	 * as it might not always be safe to do afterwards.
+	 */
+	setup_mm_for_reboot();
+
+	platform_ops->power_down();
+
+	/*
+	 * It is possible for a power_up request to happen concurrently
+	 * with a power_down request for the same CPU. In this case the
+	 * power_down method might not be able to actually enter a
+	 * powered down state with the WFI instruction if the power_up
+	 * method has removed the required reset condition.  The
+	 * power_down method is then allowed to return. We must perform
+	 * a re-entry in the kernel as if the power_up method just had
+	 * deasserted reset on the CPU.
+	 *
+	 * To simplify race issues, the platform specific implementation
+	 * must accommodate for the possibility of unordered calls to
+	 * power_down and power_up with a usage count. Therefore, if a
+	 * call to power_up is issued for a CPU that is not down, then
+	 * the next call to power_down must not attempt a full shutdown
+	 * but only do the minimum (normally disabling L1 cache and CPU
+	 * coherency) and return just as if a concurrent power_up request
+	 * had happened as described above.
+	 */
+
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset(virt_to_phys(mcpm_entry_point));
+
+	/* should never get here */
+	BUG();
+}
+
+int mcpm_wait_for_cpu_powerdown(unsigned int cpu, unsigned int cluster)
+{
+	int ret;
+
+	if (WARN_ON_ONCE(!platform_ops || !platform_ops->wait_for_powerdown))
+		return -EUNATCH;
+
+	ret = platform_ops->wait_for_powerdown(cpu, cluster);
+	if (ret)
+		pr_warn("%s: cpu %u, cluster %u failed to power down (%d)\n",
+			__func__, cpu, cluster, ret);
+
+	return ret;
+}
+
+void mcpm_cpu_suspend(u64 expected_residency)
+{
+	phys_reset_t phys_reset;
+
+	if (WARN_ON_ONCE(!platform_ops || !platform_ops->suspend))
+		return;
+	BUG_ON(!irqs_disabled());
+
+	/* Very similar to mcpm_cpu_power_down() */
+	setup_mm_for_reboot();
+	platform_ops->suspend(expected_residency);
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset(virt_to_phys(mcpm_entry_point));
+	BUG();
+}
+
+int mcpm_cpu_powered_up(void)
+{
+	if (!platform_ops)
+		return -EUNATCH;
+	if (platform_ops->powered_up)
+		platform_ops->powered_up();
+	return 0;
+}
+
+struct sync_struct mcpm_sync;
+
+/*
+ * __mcpm_cpu_going_down: Indicates that the cpu is being torn down.
+ *    This must be called at the point of committing to teardown of a CPU.
+ *    The CPU cache (SCTRL.C bit) is expected to still be active.
+ */
+void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster)
+{
+	mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
+}
+
+/*
+ * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the
+ *    cluster can be torn down without disrupting this CPU.
+ *    To avoid deadlocks, this must be called before a CPU is powered down.
+ *    The CPU cache (SCTRL.C bit) is expected to be off.
+ *    However L2 cache might or might not be active.
+ */
+void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster)
+{
+	dmb();
+	mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu);
+	sev();
+}
+
+/*
+ * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section.
+ * @state: the final state of the cluster:
+ *     CLUSTER_UP: no destructive teardown was done and the cluster has been
+ *         restored to the previous state (CPU cache still active); or
+ *     CLUSTER_DOWN: the cluster has been torn-down, ready for power-off
+ *         (CPU cache disabled, L2 cache either enabled or disabled).
+ */
+void __mcpm_outbound_leave_critical(unsigned int cluster, int state)
+{
+	dmb();
+	mcpm_sync.clusters[cluster].cluster = state;
+	sync_cache_w(&mcpm_sync.clusters[cluster].cluster);
+	sev();
+}
+
+/*
+ * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section.
+ * This function should be called by the last man, after local CPU teardown
+ * is complete.  CPU cache expected to be active.
+ *
+ * Returns:
+ *     false: the critical section was not entered because an inbound CPU was
+ *         observed, or the cluster is already being set up;
+ *     true: the critical section was entered: it is now safe to tear down the
+ *         cluster.
+ */
+bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int i;
+	struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster];
+
+	/* Warn inbound CPUs that the cluster is being torn down: */
+	c->cluster = CLUSTER_GOING_DOWN;
+	sync_cache_w(&c->cluster);
+
+	/* Back out if the inbound cluster is already in the critical region: */
+	sync_cache_r(&c->inbound);
+	if (c->inbound == INBOUND_COMING_UP)
+		goto abort;
+
+	/*
+	 * Wait for all CPUs to get out of the GOING_DOWN state, so that local
+	 * teardown is complete on each CPU before tearing down the cluster.
+	 *
+	 * If any CPU has been woken up again from the DOWN state, then we
+	 * shouldn't be taking the cluster down at all: abort in that case.
+	 */
+	sync_cache_r(&c->cpus);
+	for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) {
+		int cpustate;
+
+		if (i == cpu)
+			continue;
+
+		while (1) {
+			cpustate = c->cpus[i].cpu;
+			if (cpustate != CPU_GOING_DOWN)
+				break;
+
+			wfe();
+			sync_cache_r(&c->cpus[i].cpu);
+		}
+
+		switch (cpustate) {
+		case CPU_DOWN:
+			continue;
+
+		default:
+			goto abort;
+		}
+	}
+
+	return true;
+
+abort:
+	__mcpm_outbound_leave_critical(cluster, CLUSTER_UP);
+	return false;
+}
+
+int __mcpm_cluster_state(unsigned int cluster)
+{
+	sync_cache_r(&mcpm_sync.clusters[cluster].cluster);
+	return mcpm_sync.clusters[cluster].cluster;
+}
+
+extern unsigned long mcpm_power_up_setup_phys;
+
+int __init mcpm_sync_init(
+	void (*power_up_setup)(unsigned int affinity_level))
+{
+	unsigned int i, j, mpidr, this_cluster;
+
+	BUILD_BUG_ON(MCPM_SYNC_CLUSTER_SIZE * MAX_NR_CLUSTERS != sizeof mcpm_sync);
+	BUG_ON((unsigned long)&mcpm_sync & (__CACHE_WRITEBACK_GRANULE - 1));
+
+	/*
+	 * Set initial CPU and cluster states.
+	 * Only one cluster is assumed to be active at this point.
+	 */
+	for (i = 0; i < MAX_NR_CLUSTERS; i++) {
+		mcpm_sync.clusters[i].cluster = CLUSTER_DOWN;
+		mcpm_sync.clusters[i].inbound = INBOUND_NOT_COMING_UP;
+		for (j = 0; j < MAX_CPUS_PER_CLUSTER; j++)
+			mcpm_sync.clusters[i].cpus[j].cpu = CPU_DOWN;
+	}
+	mpidr = read_cpuid_mpidr();
+	this_cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	for_each_online_cpu(i)
+		mcpm_sync.clusters[this_cluster].cpus[i].cpu = CPU_UP;
+	mcpm_sync.clusters[this_cluster].cluster = CLUSTER_UP;
+	sync_cache_w(&mcpm_sync);
+
+	if (power_up_setup) {
+		mcpm_power_up_setup_phys = virt_to_phys(power_up_setup);
+		sync_cache_w(&mcpm_power_up_setup_phys);
+	}
+
+	return 0;
+}
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
new file mode 100644
index 00000000000..e02db4b81a6
--- /dev/null
+++ b/arch/arm/common/mcpm_head.S
@@ -0,0 +1,233 @@
+/*
+ * arch/arm/common/mcpm_head.S -- kernel entry point for multi-cluster PM
+ *
+ * Created by:  Nicolas Pitre, March 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *
+ * Refer to Documentation/arm/cluster-pm-race-avoidance.txt
+ * for details of the synchronisation algorithms used here.
+ */
+
+#include <linux/linkage.h>
+#include <asm/mcpm.h>
+#include <asm/assembler.h>
+
+#include "vlock.h"
+
+.if MCPM_SYNC_CLUSTER_CPUS
+.error "cpus must be the first member of struct mcpm_sync_struct"
+.endif
+
+	.macro	pr_dbg	string
+#if defined(CONFIG_DEBUG_LL) && defined(DEBUG)
+	b	1901f
+1902:	.asciz	"CPU"
+1903:	.asciz	" cluster"
+1904:	.asciz	": \string"
+	.align
+1901:	adr	r0, 1902b
+	bl	printascii
+	mov	r0, r9
+	bl	printhex2
+	adr	r0, 1903b
+	bl	printascii
+	mov	r0, r10
+	bl	printhex2
+	adr	r0, 1904b
+	bl	printascii
+#endif
+	.endm
+
+	.arm
+	.align
+
+ENTRY(mcpm_entry_point)
+
+ ARM_BE8(setend        be)
+ THUMB(	adr	r12, BSYM(1f)	)
+ THUMB(	bx	r12		)
+ THUMB(	.thumb			)
+1:
+	mrc	p15, 0, r0, c0, c0, 5		@ MPIDR
+	ubfx	r9, r0, #0, #8			@ r9 = cpu
+	ubfx	r10, r0, #8, #8			@ r10 = cluster
+	mov	r3, #MAX_CPUS_PER_CLUSTER
+	mla	r4, r3, r10, r9			@ r4 = canonical CPU index
+	cmp	r4, #(MAX_CPUS_PER_CLUSTER * MAX_NR_CLUSTERS)
+	blo	2f
+
+	/* We didn't expect this CPU.  Try to cheaply make it quiet. */
+1:	wfi
+	wfe
+	b	1b
+
+2:	pr_dbg	"kernel mcpm_entry_point\n"
+
+	/*
+	 * MMU is off so we need to get to various variables in a
+	 * position independent way.
+	 */
+	adr	r5, 3f
+	ldmia	r5, {r0, r6, r7, r8, r11}
+	add	r0, r5, r0			@ r0 = mcpm_entry_early_pokes
+	add	r6, r5, r6			@ r6 = mcpm_entry_vectors
+	ldr	r7, [r5, r7]			@ r7 = mcpm_power_up_setup_phys
+	add	r8, r5, r8			@ r8 = mcpm_sync
+	add	r11, r5, r11			@ r11 = first_man_locks
+
+	@ Perform an early poke, if any
+	add	r0, r0, r4, lsl #3
+	ldmia	r0, {r0, r1}
+	teq	r0, #0
+	strne	r1, [r0]
+
+	mov	r0, #MCPM_SYNC_CLUSTER_SIZE
+	mla	r8, r0, r10, r8			@ r8 = sync cluster base
+
+	@ Signal that this CPU is coming UP:
+	mov	r0, #CPU_COMING_UP
+	mov	r5, #MCPM_SYNC_CPU_SIZE
+	mla	r5, r9, r5, r8			@ r5 = sync cpu address
+	strb	r0, [r5]
+
+	@ At this point, the cluster cannot unexpectedly enter the GOING_DOWN
+	@ state, because there is at least one active CPU (this CPU).
+
+	mov	r0, #VLOCK_SIZE
+	mla	r11, r0, r10, r11		@ r11 = cluster first man lock
+	mov	r0, r11
+	mov	r1, r9				@ cpu
+	bl	vlock_trylock			@ implies DMB
+
+	cmp	r0, #0				@ failed to get the lock?
+	bne	mcpm_setup_wait		@ wait for cluster setup if so
+
+	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	cmp	r0, #CLUSTER_UP			@ cluster already up?
+	bne	mcpm_setup			@ if not, set up the cluster
+
+	@ Otherwise, release the first man lock and skip setup:
+	mov	r0, r11
+	bl	vlock_unlock
+	b	mcpm_setup_complete
+
+mcpm_setup:
+	@ Control dependency implies strb not observable before previous ldrb.
+
+	@ Signal that the cluster is being brought up:
+	mov	r0, #INBOUND_COMING_UP
+	strb	r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND]
+	dmb
+
+	@ Any CPU trying to take the cluster into CLUSTER_GOING_DOWN from this
+	@ point onwards will observe INBOUND_COMING_UP and abort.
+
+	@ Wait for any previously-pending cluster teardown operations to abort
+	@ or complete:
+mcpm_teardown_wait:
+	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	cmp	r0, #CLUSTER_GOING_DOWN
+	bne	first_man_setup
+	wfe
+	b	mcpm_teardown_wait
+
+first_man_setup:
+	dmb
+
+	@ If the outbound gave up before teardown started, skip cluster setup:
+
+	cmp	r0, #CLUSTER_UP
+	beq	mcpm_setup_leave
+
+	@ power_up_setup is now responsible for setting up the cluster:
+
+	cmp	r7, #0
+	mov	r0, #1		@ second (cluster) affinity level
+	blxne	r7		@ Call power_up_setup if defined
+	dmb
+
+	mov	r0, #CLUSTER_UP
+	strb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	dmb
+
+mcpm_setup_leave:
+	@ Leave the cluster setup critical section:
+
+	mov	r0, #INBOUND_NOT_COMING_UP
+	strb	r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND]
+	dsb	st
+	sev
+
+	mov	r0, r11
+	bl	vlock_unlock	@ implies DMB
+	b	mcpm_setup_complete
+
+	@ In the contended case, non-first men wait here for cluster setup
+	@ to complete:
+mcpm_setup_wait:
+	ldrb	r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER]
+	cmp	r0, #CLUSTER_UP
+	wfene
+	bne	mcpm_setup_wait
+	dmb
+
+mcpm_setup_complete:
+	@ If a platform-specific CPU setup hook is needed, it is
+	@ called from here.
+
+	cmp	r7, #0
+	mov	r0, #0		@ first (CPU) affinity level
+	blxne	r7		@ Call power_up_setup if defined
+	dmb
+
+	@ Mark the CPU as up:
+
+	mov	r0, #CPU_UP
+	strb	r0, [r5]
+
+	@ Observability order of CPU_UP and opening of the gate does not matter.
+
+mcpm_entry_gated:
+	ldr	r5, [r6, r4, lsl #2]		@ r5 = CPU entry vector
+	cmp	r5, #0
+	wfeeq
+	beq	mcpm_entry_gated
+	dmb
+
+	pr_dbg	"released\n"
+	bx	r5
+
+	.align	2
+
+3:	.word	mcpm_entry_early_pokes - .
+	.word	mcpm_entry_vectors - 3b
+	.word	mcpm_power_up_setup_phys - 3b
+	.word	mcpm_sync - 3b
+	.word	first_man_locks - 3b
+
+ENDPROC(mcpm_entry_point)
+
+	.bss
+
+	.align	CACHE_WRITEBACK_ORDER
+	.type	first_man_locks, #object
+first_man_locks:
+	.space	VLOCK_SIZE * MAX_NR_CLUSTERS
+	.align	CACHE_WRITEBACK_ORDER
+
+	.type	mcpm_entry_vectors, #object
+ENTRY(mcpm_entry_vectors)
+	.space	4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+
+	.type	mcpm_entry_early_pokes, #object
+ENTRY(mcpm_entry_early_pokes)
+	.space	8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+
+	.type	mcpm_power_up_setup_phys, #object
+ENTRY(mcpm_power_up_setup_phys)
+	.space  4		@ set by mcpm_sync_init()
diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c
new file mode 100644
index 00000000000..92e54d7c6f4
--- /dev/null
+++ b/arch/arm/common/mcpm_platsmp.c
@@ -0,0 +1,103 @@
+/*
+ * linux/arch/arm/mach-vexpress/mcpm_platsmp.c
+ *
+ * Created by:  Nicolas Pitre, November 2012
+ * Copyright:   (C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Code to handle secondary CPU bringup and hotplug for the cluster power API.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+
+#include <asm/mcpm.h>
+#include <asm/smp.h>
+#include <asm/smp_plat.h>
+
+static void cpu_to_pcpu(unsigned int cpu,
+			unsigned int *pcpu, unsigned int *pcluster)
+{
+	unsigned int mpidr;
+
+	mpidr = cpu_logical_map(cpu);
+	*pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	*pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+}
+
+static int mcpm_boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	unsigned int pcpu, pcluster, ret;
+	extern void secondary_startup(void);
+
+	cpu_to_pcpu(cpu, &pcpu, &pcluster);
+
+	pr_debug("%s: logical CPU %d is physical CPU %d cluster %d\n",
+		 __func__, cpu, pcpu, pcluster);
+
+	mcpm_set_entry_vector(pcpu, pcluster, NULL);
+	ret = mcpm_cpu_power_up(pcpu, pcluster);
+	if (ret)
+		return ret;
+	mcpm_set_entry_vector(pcpu, pcluster, secondary_startup);
+	arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+	dsb_sev();
+	return 0;
+}
+
+static void mcpm_secondary_init(unsigned int cpu)
+{
+	mcpm_cpu_powered_up();
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int mcpm_cpu_kill(unsigned int cpu)
+{
+	unsigned int pcpu, pcluster;
+
+	cpu_to_pcpu(cpu, &pcpu, &pcluster);
+
+	return !mcpm_wait_for_cpu_powerdown(pcpu, pcluster);
+}
+
+static int mcpm_cpu_disable(unsigned int cpu)
+{
+	/*
+	 * We assume all CPUs may be shut down.
+	 * This would be the hook to use for eventual Secure
+	 * OS migration requests as described in the PSCI spec.
+	 */
+	return 0;
+}
+
+static void mcpm_cpu_die(unsigned int cpu)
+{
+	unsigned int mpidr, pcpu, pcluster;
+	mpidr = read_cpuid_mpidr();
+	pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	mcpm_set_entry_vector(pcpu, pcluster, NULL);
+	mcpm_cpu_power_down();
+}
+
+#endif
+
+static struct smp_operations __initdata mcpm_smp_ops = {
+	.smp_boot_secondary	= mcpm_boot_secondary,
+	.smp_secondary_init	= mcpm_secondary_init,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_kill		= mcpm_cpu_kill,
+	.cpu_disable		= mcpm_cpu_disable,
+	.cpu_die		= mcpm_cpu_die,
+#endif
+};
+
+void __init mcpm_smp_set_ops(void)
+{
+	smp_set_ops(&mcpm_smp_ops);
+}
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 8ba7044c554..e57d7e5bf96 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -16,6 +16,7 @@
  */
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
@@ -28,14 +29,67 @@
 #include <linux/io.h>
 
 #include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/irq.h>
 #include <asm/mach/irq.h>
+#include <asm/mach-types.h>
 #include <asm/sizes.h>
 
 #include <asm/hardware/sa1111.h>
 
-extern void __init sa1110_mb_enable(void);
+/* SA1111 IRQs */
+#define IRQ_GPAIN0		(0)
+#define IRQ_GPAIN1		(1)
+#define IRQ_GPAIN2		(2)
+#define IRQ_GPAIN3		(3)
+#define IRQ_GPBIN0		(4)
+#define IRQ_GPBIN1		(5)
+#define IRQ_GPBIN2		(6)
+#define IRQ_GPBIN3		(7)
+#define IRQ_GPBIN4		(8)
+#define IRQ_GPBIN5		(9)
+#define IRQ_GPCIN0		(10)
+#define IRQ_GPCIN1		(11)
+#define IRQ_GPCIN2		(12)
+#define IRQ_GPCIN3		(13)
+#define IRQ_GPCIN4		(14)
+#define IRQ_GPCIN5		(15)
+#define IRQ_GPCIN6		(16)
+#define IRQ_GPCIN7		(17)
+#define IRQ_MSTXINT		(18)
+#define IRQ_MSRXINT		(19)
+#define IRQ_MSSTOPERRINT	(20)
+#define IRQ_TPTXINT		(21)
+#define IRQ_TPRXINT		(22)
+#define IRQ_TPSTOPERRINT	(23)
+#define SSPXMTINT		(24)
+#define SSPRCVINT		(25)
+#define SSPROR			(26)
+#define AUDXMTDMADONEA		(32)
+#define AUDRCVDMADONEA		(33)
+#define AUDXMTDMADONEB		(34)
+#define AUDRCVDMADONEB		(35)
+#define AUDTFSR			(36)
+#define AUDRFSR			(37)
+#define AUDTUR			(38)
+#define AUDROR			(39)
+#define AUDDTS			(40)
+#define AUDRDD			(41)
+#define AUDSTO			(42)
+#define IRQ_USBPWR		(43)
+#define IRQ_HCIM		(44)
+#define IRQ_HCIBUFFACC		(45)
+#define IRQ_HCIRMTWKP		(46)
+#define IRQ_NHCIMFCIR		(47)
+#define IRQ_USB_PORT_RESUME	(48)
+#define IRQ_S0_READY_NINT	(49)
+#define IRQ_S1_READY_NINT	(50)
+#define IRQ_S0_CD_VALID		(51)
+#define IRQ_S1_CD_VALID		(52)
+#define IRQ_S0_BVD1_STSCHG	(53)
+#define IRQ_S1_BVD1_STSCHG	(54)
+#define SA1111_IRQ_NR		(55)
+
+extern void sa1110_mb_enable(void);
+extern void sa1110_mb_disable(void);
 
 /*
  * We keep the following data for the overall SA1111.  Note that the
@@ -49,8 +103,10 @@ struct sa1111 {
 	struct clk	*clk;
 	unsigned long	phys;
 	int		irq;
+	int		irq_base;	/* base for cascaded on-chip IRQs */
 	spinlock_t	lock;
 	void __iomem	*base;
+	struct sa1111_platform_data *pdata;
 #ifdef CONFIG_PM
 	void		*saved_state;
 #endif
@@ -65,6 +121,7 @@ static struct sa1111 *g_sa1111;
 struct sa1111_dev_info {
 	unsigned long	offset;
 	unsigned long	skpcr_mask;
+	bool		dma;
 	unsigned int	devid;
 	unsigned int	irq[6];
 };
@@ -73,6 +130,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
 	{
 		.offset		= SA1111_USB,
 		.skpcr_mask	= SKPCR_UCLKEN,
+		.dma		= true,
 		.devid		= SA1111_DEVID_USB,
 		.irq = {
 			IRQ_USBPWR,
@@ -86,6 +144,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
 	{
 		.offset		= 0x0600,
 		.skpcr_mask	= SKPCR_I2SCLKEN | SKPCR_L3CLKEN,
+		.dma		= true,
 		.devid		= SA1111_DEVID_SAC,
 		.irq = {
 			AUDXMTDMADONEA,
@@ -102,7 +161,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
 	{
 		.offset		= SA1111_KBD,
 		.skpcr_mask	= SKPCR_PTCLKEN,
-		.devid		= SA1111_DEVID_PS2,
+		.devid		= SA1111_DEVID_PS2_KBD,
 		.irq = {
 			IRQ_TPRXINT,
 			IRQ_TPTXINT
@@ -111,7 +170,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
 	{
 		.offset		= SA1111_MSE,
 		.skpcr_mask	= SKPCR_PMCLKEN,
-		.devid		= SA1111_DEVID_PS2,
+		.devid		= SA1111_DEVID_PS2_MSE,
 		.irq = {
 			IRQ_MSRXINT,
 			IRQ_MSTXINT
@@ -132,17 +191,6 @@ static struct sa1111_dev_info sa1111_devices[] = {
 	},
 };
 
-void __init sa1111_adjust_zones(int node, unsigned long *size, unsigned long *holes)
-{
-	unsigned int sz = SZ_1M >> PAGE_SHIFT;
-
-	if (node != 0)
-		sz = 0;
-
-	size[1] = size[0] - sz;
-	size[0] = sz;
-}
-
 /*
  * SA1111 interrupt support.  Since clearing an IRQ while there are
  * active IRQs causes the interrupt output to pulse, the upper levels
@@ -152,58 +200,61 @@ static void
 sa1111_irq_handler(unsigned int irq, struct irq_desc *desc)
 {
 	unsigned int stat0, stat1, i;
-	void __iomem *base = get_irq_data(irq);
+	struct sa1111 *sachip = irq_get_handler_data(irq);
+	void __iomem *mapbase = sachip->base + SA1111_INTC;
 
-	stat0 = sa1111_readl(base + SA1111_INTSTATCLR0);
-	stat1 = sa1111_readl(base + SA1111_INTSTATCLR1);
+	stat0 = sa1111_readl(mapbase + SA1111_INTSTATCLR0);
+	stat1 = sa1111_readl(mapbase + SA1111_INTSTATCLR1);
 
-	sa1111_writel(stat0, base + SA1111_INTSTATCLR0);
+	sa1111_writel(stat0, mapbase + SA1111_INTSTATCLR0);
 
-	desc->chip->ack(irq);
+	desc->irq_data.chip->irq_ack(&desc->irq_data);
 
-	sa1111_writel(stat1, base + SA1111_INTSTATCLR1);
+	sa1111_writel(stat1, mapbase + SA1111_INTSTATCLR1);
 
 	if (stat0 == 0 && stat1 == 0) {
 		do_bad_IRQ(irq, desc);
 		return;
 	}
 
-	for (i = IRQ_SA1111_START; stat0; i++, stat0 >>= 1)
+	for (i = 0; stat0; i++, stat0 >>= 1)
 		if (stat0 & 1)
-			handle_edge_irq(i, irq_desc + i);
+			generic_handle_irq(i + sachip->irq_base);
 
-	for (i = IRQ_SA1111_START + 32; stat1; i++, stat1 >>= 1)
+	for (i = 32; stat1; i++, stat1 >>= 1)
 		if (stat1 & 1)
-			handle_edge_irq(i, irq_desc + i);
+			generic_handle_irq(i + sachip->irq_base);
 
 	/* For level-based interrupts */
-	desc->chip->unmask(irq);
+	desc->irq_data.chip->irq_unmask(&desc->irq_data);
 }
 
-#define SA1111_IRQMASK_LO(x)	(1 << (x - IRQ_SA1111_START))
-#define SA1111_IRQMASK_HI(x)	(1 << (x - IRQ_SA1111_START - 32))
+#define SA1111_IRQMASK_LO(x)	(1 << (x - sachip->irq_base))
+#define SA1111_IRQMASK_HI(x)	(1 << (x - sachip->irq_base - 32))
 
-static void sa1111_ack_irq(unsigned int irq)
+static void sa1111_ack_irq(struct irq_data *d)
 {
 }
 
-static void sa1111_mask_lowirq(unsigned int irq)
+static void sa1111_mask_lowirq(struct irq_data *d)
 {
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+	void __iomem *mapbase = sachip->base + SA1111_INTC;
 	unsigned long ie0;
 
 	ie0 = sa1111_readl(mapbase + SA1111_INTEN0);
-	ie0 &= ~SA1111_IRQMASK_LO(irq);
+	ie0 &= ~SA1111_IRQMASK_LO(d->irq);
 	writel(ie0, mapbase + SA1111_INTEN0);
 }
 
-static void sa1111_unmask_lowirq(unsigned int irq)
+static void sa1111_unmask_lowirq(struct irq_data *d)
 {
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+	void __iomem *mapbase = sachip->base + SA1111_INTC;
 	unsigned long ie0;
 
 	ie0 = sa1111_readl(mapbase + SA1111_INTEN0);
-	ie0 |= SA1111_IRQMASK_LO(irq);
+	ie0 |= SA1111_IRQMASK_LO(d->irq);
 	sa1111_writel(ie0, mapbase + SA1111_INTEN0);
 }
 
@@ -214,10 +265,11 @@ static void sa1111_unmask_lowirq(unsigned int irq)
  * be triggered.  In fact, its very difficult, if not impossible to get
  * INTSET to re-trigger the interrupt.
  */
-static int sa1111_retrigger_lowirq(unsigned int irq)
+static int sa1111_retrigger_lowirq(struct irq_data *d)
 {
-	unsigned int mask = SA1111_IRQMASK_LO(irq);
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+	void __iomem *mapbase = sachip->base + SA1111_INTC;
+	unsigned int mask = SA1111_IRQMASK_LO(d->irq);
 	unsigned long ip0;
 	int i;
 
@@ -225,20 +277,21 @@ static int sa1111_retrigger_lowirq(unsigned int irq)
 	for (i = 0; i < 8; i++) {
 		sa1111_writel(ip0 ^ mask, mapbase + SA1111_INTPOL0);
 		sa1111_writel(ip0, mapbase + SA1111_INTPOL0);
-		if (sa1111_readl(mapbase + SA1111_INTSTATCLR1) & mask)
+		if (sa1111_readl(mapbase + SA1111_INTSTATCLR0) & mask)
 			break;
 	}
 
 	if (i == 8)
 		printk(KERN_ERR "Danger Will Robinson: failed to "
-			"re-trigger IRQ%d\n", irq);
+			"re-trigger IRQ%d\n", d->irq);
 	return i == 8 ? -1 : 0;
 }
 
-static int sa1111_type_lowirq(unsigned int irq, unsigned int flags)
+static int sa1111_type_lowirq(struct irq_data *d, unsigned int flags)
 {
-	unsigned int mask = SA1111_IRQMASK_LO(irq);
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+	void __iomem *mapbase = sachip->base + SA1111_INTC;
+	unsigned int mask = SA1111_IRQMASK_LO(d->irq);
 	unsigned long ip0;
 
 	if (flags == IRQ_TYPE_PROBE)
@@ -258,10 +311,11 @@ static int sa1111_type_lowirq(unsigned int irq, unsigned int flags)
 	return 0;
 }
 
-static int sa1111_wake_lowirq(unsigned int irq, unsigned int on)
+static int sa1111_wake_lowirq(struct irq_data *d, unsigned int on)
 {
-	unsigned int mask = SA1111_IRQMASK_LO(irq);
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+	void __iomem *mapbase = sachip->base + SA1111_INTC;
+	unsigned int mask = SA1111_IRQMASK_LO(d->irq);
 	unsigned long we0;
 
 	we0 = sa1111_readl(mapbase + SA1111_WAKEEN0);
@@ -276,31 +330,33 @@ static int sa1111_wake_lowirq(unsigned int irq, unsigned int on)
 
 static struct irq_chip sa1111_low_chip = {
 	.name		= "SA1111-l",
-	.ack		= sa1111_ack_irq,
-	.mask		= sa1111_mask_lowirq,
-	.unmask		= sa1111_unmask_lowirq,
-	.retrigger	= sa1111_retrigger_lowirq,
-	.set_type	= sa1111_type_lowirq,
-	.set_wake	= sa1111_wake_lowirq,
+	.irq_ack	= sa1111_ack_irq,
+	.irq_mask	= sa1111_mask_lowirq,
+	.irq_unmask	= sa1111_unmask_lowirq,
+	.irq_retrigger	= sa1111_retrigger_lowirq,
+	.irq_set_type	= sa1111_type_lowirq,
+	.irq_set_wake	= sa1111_wake_lowirq,
 };
 
-static void sa1111_mask_highirq(unsigned int irq)
+static void sa1111_mask_highirq(struct irq_data *d)
 {
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+	void __iomem *mapbase = sachip->base + SA1111_INTC;
 	unsigned long ie1;
 
 	ie1 = sa1111_readl(mapbase + SA1111_INTEN1);
-	ie1 &= ~SA1111_IRQMASK_HI(irq);
+	ie1 &= ~SA1111_IRQMASK_HI(d->irq);
 	sa1111_writel(ie1, mapbase + SA1111_INTEN1);
 }
 
-static void sa1111_unmask_highirq(unsigned int irq)
+static void sa1111_unmask_highirq(struct irq_data *d)
 {
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+	void __iomem *mapbase = sachip->base + SA1111_INTC;
 	unsigned long ie1;
 
 	ie1 = sa1111_readl(mapbase + SA1111_INTEN1);
-	ie1 |= SA1111_IRQMASK_HI(irq);
+	ie1 |= SA1111_IRQMASK_HI(d->irq);
 	sa1111_writel(ie1, mapbase + SA1111_INTEN1);
 }
 
@@ -311,10 +367,11 @@ static void sa1111_unmask_highirq(unsigned int irq)
  * be triggered.  In fact, its very difficult, if not impossible to get
  * INTSET to re-trigger the interrupt.
  */
-static int sa1111_retrigger_highirq(unsigned int irq)
+static int sa1111_retrigger_highirq(struct irq_data *d)
 {
-	unsigned int mask = SA1111_IRQMASK_HI(irq);
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+	void __iomem *mapbase = sachip->base + SA1111_INTC;
+	unsigned int mask = SA1111_IRQMASK_HI(d->irq);
 	unsigned long ip1;
 	int i;
 
@@ -328,14 +385,15 @@ static int sa1111_retrigger_highirq(unsigned int irq)
 
 	if (i == 8)
 		printk(KERN_ERR "Danger Will Robinson: failed to "
-			"re-trigger IRQ%d\n", irq);
+			"re-trigger IRQ%d\n", d->irq);
 	return i == 8 ? -1 : 0;
 }
 
-static int sa1111_type_highirq(unsigned int irq, unsigned int flags)
+static int sa1111_type_highirq(struct irq_data *d, unsigned int flags)
 {
-	unsigned int mask = SA1111_IRQMASK_HI(irq);
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+	void __iomem *mapbase = sachip->base + SA1111_INTC;
+	unsigned int mask = SA1111_IRQMASK_HI(d->irq);
 	unsigned long ip1;
 
 	if (flags == IRQ_TYPE_PROBE)
@@ -355,10 +413,11 @@ static int sa1111_type_highirq(unsigned int irq, unsigned int flags)
 	return 0;
 }
 
-static int sa1111_wake_highirq(unsigned int irq, unsigned int on)
+static int sa1111_wake_highirq(struct irq_data *d, unsigned int on)
 {
-	unsigned int mask = SA1111_IRQMASK_HI(irq);
-	void __iomem *mapbase = get_irq_chip_data(irq);
+	struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
+	void __iomem *mapbase = sachip->base + SA1111_INTC;
+	unsigned int mask = SA1111_IRQMASK_HI(d->irq);
 	unsigned long we1;
 
 	we1 = sa1111_readl(mapbase + SA1111_WAKEEN1);
@@ -373,24 +432,36 @@ static int sa1111_wake_highirq(unsigned int irq, unsigned int on)
 
 static struct irq_chip sa1111_high_chip = {
 	.name		= "SA1111-h",
-	.ack		= sa1111_ack_irq,
-	.mask		= sa1111_mask_highirq,
-	.unmask		= sa1111_unmask_highirq,
-	.retrigger	= sa1111_retrigger_highirq,
-	.set_type	= sa1111_type_highirq,
-	.set_wake	= sa1111_wake_highirq,
+	.irq_ack	= sa1111_ack_irq,
+	.irq_mask	= sa1111_mask_highirq,
+	.irq_unmask	= sa1111_unmask_highirq,
+	.irq_retrigger	= sa1111_retrigger_highirq,
+	.irq_set_type	= sa1111_type_highirq,
+	.irq_set_wake	= sa1111_wake_highirq,
 };
 
-static void sa1111_setup_irq(struct sa1111 *sachip)
+static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
 {
 	void __iomem *irqbase = sachip->base + SA1111_INTC;
-	unsigned int irq;
+	unsigned i, irq;
+	int ret;
 
 	/*
 	 * We're guaranteed that this region hasn't been taken.
 	 */
 	request_mem_region(sachip->phys + SA1111_INTC, 512, "irq");
 
+	ret = irq_alloc_descs(-1, irq_base, SA1111_IRQ_NR, -1);
+	if (ret <= 0) {
+		dev_err(sachip->dev, "unable to allocate %u irqs: %d\n",
+			SA1111_IRQ_NR, ret);
+		if (ret == 0)
+			ret = -EINVAL;
+		return ret;
+	}
+
+	sachip->irq_base = ret;
+
 	/* disable all IRQs */
 	sa1111_writel(0, irqbase + SA1111_INTEN0);
 	sa1111_writel(0, irqbase + SA1111_INTEN1);
@@ -410,26 +481,33 @@ static void sa1111_setup_irq(struct sa1111 *sachip)
 	sa1111_writel(~0, irqbase + SA1111_INTSTATCLR0);
 	sa1111_writel(~0, irqbase + SA1111_INTSTATCLR1);
 
-	for (irq = IRQ_GPAIN0; irq <= SSPROR; irq++) {
-		set_irq_chip(irq, &sa1111_low_chip);
-		set_irq_chip_data(irq, irqbase);
-		set_irq_handler(irq, handle_edge_irq);
+	for (i = IRQ_GPAIN0; i <= SSPROR; i++) {
+		irq = sachip->irq_base + i;
+		irq_set_chip_and_handler(irq, &sa1111_low_chip,
+					 handle_edge_irq);
+		irq_set_chip_data(irq, sachip);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 	}
 
-	for (irq = AUDXMTDMADONEA; irq <= IRQ_S1_BVD1_STSCHG; irq++) {
-		set_irq_chip(irq, &sa1111_high_chip);
-		set_irq_chip_data(irq, irqbase);
-		set_irq_handler(irq, handle_edge_irq);
+	for (i = AUDXMTDMADONEA; i <= IRQ_S1_BVD1_STSCHG; i++) {
+		irq = sachip->irq_base + i;
+		irq_set_chip_and_handler(irq, &sa1111_high_chip,
+					 handle_edge_irq);
+		irq_set_chip_data(irq, sachip);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 	}
 
 	/*
 	 * Register SA1111 interrupt
 	 */
-	set_irq_type(sachip->irq, IRQ_TYPE_EDGE_RISING);
-	set_irq_data(sachip->irq, irqbase);
-	set_irq_chained_handler(sachip->irq, sa1111_irq_handler);
+	irq_set_irq_type(sachip->irq, IRQ_TYPE_EDGE_RISING);
+	irq_set_handler_data(sachip->irq, sachip);
+	irq_set_chained_handler(sachip->irq, sa1111_irq_handler);
+
+	dev_info(sachip->dev, "Providing IRQ%u-%u\n",
+		sachip->irq_base, sachip->irq_base + SA1111_IRQ_NR - 1);
+
+	return 0;
 }
 
 /*
@@ -526,14 +604,12 @@ sa1111_configure_smc(struct sa1111 *sachip, int sdram, unsigned int drac,
 
 	sachip->dev->coherent_dma_mask &= sa1111_dma_mask[drac >> 2];
 }
-
 #endif
 
 static void sa1111_dev_release(struct device *_dev)
 {
 	struct sa1111_dev *dev = SA1111_DEV(_dev);
 
-	release_resource(&dev->res);
 	kfree(dev);
 }
 
@@ -542,66 +618,58 @@ sa1111_init_one_child(struct sa1111 *sachip, struct resource *parent,
 		      struct sa1111_dev_info *info)
 {
 	struct sa1111_dev *dev;
+	unsigned i;
 	int ret;
 
 	dev = kzalloc(sizeof(struct sa1111_dev), GFP_KERNEL);
 	if (!dev) {
 		ret = -ENOMEM;
-		goto out;
+		goto err_alloc;
 	}
 
+	device_initialize(&dev->dev);
 	dev_set_name(&dev->dev, "%4.4lx", info->offset);
 	dev->devid	 = info->devid;
 	dev->dev.parent  = sachip->dev;
 	dev->dev.bus     = &sa1111_bus_type;
 	dev->dev.release = sa1111_dev_release;
-	dev->dev.coherent_dma_mask = sachip->dev->coherent_dma_mask;
 	dev->res.start   = sachip->phys + info->offset;
 	dev->res.end     = dev->res.start + 511;
 	dev->res.name    = dev_name(&dev->dev);
 	dev->res.flags   = IORESOURCE_MEM;
 	dev->mapbase     = sachip->base + info->offset;
 	dev->skpcr_mask  = info->skpcr_mask;
-	memmove(dev->irq, info->irq, sizeof(dev->irq));
-
-	ret = request_resource(parent, &dev->res);
-	if (ret) {
-		printk("SA1111: failed to allocate resource for %s\n",
-			dev->res.name);
-		dev_set_name(&dev->dev, NULL);
-		kfree(dev);
-		goto out;
-	}
-
 
-	ret = device_register(&dev->dev);
-	if (ret) {
-		release_resource(&dev->res);
-		kfree(dev);
-		goto out;
-	}
+	for (i = 0; i < ARRAY_SIZE(info->irq); i++)
+		dev->irq[i] = sachip->irq_base + info->irq[i];
 
-#ifdef CONFIG_DMABOUNCE
 	/*
-	 * If the parent device has a DMA mask associated with it,
-	 * propagate it down to the children.
+	 * If the parent device has a DMA mask associated with it, and
+	 * this child supports DMA, propagate it down to the children.
 	 */
-	if (sachip->dev->dma_mask) {
+	if (info->dma && sachip->dev->dma_mask) {
 		dev->dma_mask = *sachip->dev->dma_mask;
 		dev->dev.dma_mask = &dev->dma_mask;
+		dev->dev.coherent_dma_mask = sachip->dev->coherent_dma_mask;
+	}
 
-		if (dev->dma_mask != 0xffffffffUL) {
-			ret = dmabounce_register_dev(&dev->dev, 1024, 4096);
-			if (ret) {
-				dev_err(&dev->dev, "SA1111: Failed to register"
-					" with dmabounce\n");
-				device_unregister(&dev->dev);
-			}
-		}
+	ret = request_resource(parent, &dev->res);
+	if (ret) {
+		dev_err(sachip->dev, "failed to allocate resource for %s\n",
+			dev->res.name);
+		goto err_resource;
 	}
-#endif
 
-out:
+	ret = device_add(&dev->dev);
+	if (ret)
+		goto err_add;
+	return 0;
+
+ err_add:
+	release_resource(&dev->res);
+ err_resource:
+	put_device(&dev->dev);
+ err_alloc:
 	return ret;
 }
 
@@ -615,16 +683,20 @@ out:
  *	Returns:
  *	%-ENODEV	device not found.
  *	%-EBUSY		physical address already marked in-use.
+ *	%-EINVAL	no platform data passed
  *	%0		successful.
  */
-static int
-__sa1111_probe(struct device *me, struct resource *mem, int irq)
+static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 {
+	struct sa1111_platform_data *pd = me->platform_data;
 	struct sa1111 *sachip;
 	unsigned long id;
 	unsigned int has_devs;
 	int i, ret = -ENODEV;
 
+	if (!pd)
+		return -EINVAL;
+
 	sachip = kzalloc(sizeof(struct sa1111), GFP_KERNEL);
 	if (!sachip)
 		return -ENOMEM;
@@ -635,11 +707,16 @@ __sa1111_probe(struct device *me, struct resource *mem, int irq)
 		goto err_free;
 	}
 
+	ret = clk_prepare(sachip->clk);
+	if (ret)
+		goto err_clkput;
+
 	spin_lock_init(&sachip->lock);
 
 	sachip->dev = me;
 	dev_set_drvdata(sachip->dev, sachip);
 
+	sachip->pdata = pd;
 	sachip->phys = mem->start;
 	sachip->irq = irq;
 
@@ -650,7 +727,7 @@ __sa1111_probe(struct device *me, struct resource *mem, int irq)
 	sachip->base = ioremap(mem->start, PAGE_SIZE * 2);
 	if (!sachip->base) {
 		ret = -ENOMEM;
-		goto err_clkput;
+		goto err_clk_unprep;
 	}
 
 	/*
@@ -672,6 +749,16 @@ __sa1111_probe(struct device *me, struct resource *mem, int irq)
 	 */
 	sa1111_wake(sachip);
 
+	/*
+	 * The interrupt controller must be initialised before any
+	 * other device to ensure that the interrupts are available.
+	 */
+	if (sachip->irq != NO_IRQ) {
+		ret = sa1111_setup_irq(sachip, pd->irq_base);
+		if (ret)
+			goto err_unmap;
+	}
+
 #ifdef CONFIG_ARCH_SA1100
 	{
 	unsigned int val;
@@ -702,30 +789,22 @@ __sa1111_probe(struct device *me, struct resource *mem, int irq)
 	}
 #endif
 
-	/*
-	 * The interrupt controller must be initialised before any
-	 * other device to ensure that the interrupts are available.
-	 */
-	if (sachip->irq != NO_IRQ)
-		sa1111_setup_irq(sachip);
-
 	g_sa1111 = sachip;
 
 	has_devs = ~0;
-	if (machine_is_assabet() || machine_is_jornada720() ||
-	    machine_is_badge4())
-		has_devs &= ~(1 << 4);
-	else
-		has_devs &= ~(1 << 1);
+	if (pd)
+		has_devs &= ~pd->disable_devs;
 
 	for (i = 0; i < ARRAY_SIZE(sa1111_devices); i++)
-		if (has_devs & (1 << i))
+		if (sa1111_devices[i].devid & has_devs)
 			sa1111_init_one_child(sachip, mem, &sa1111_devices[i]);
 
 	return 0;
 
  err_unmap:
 	iounmap(sachip->base);
+ err_clk_unprep:
+	clk_unprepare(sachip->clk);
  err_clkput:
 	clk_put(sachip->clk);
  err_free:
@@ -735,7 +814,10 @@ __sa1111_probe(struct device *me, struct resource *mem, int irq)
 
 static int sa1111_remove_one(struct device *dev, void *data)
 {
-	device_unregister(dev);
+	struct sa1111_dev *sadev = SA1111_DEV(dev);
+	device_del(&sadev->dev);
+	release_resource(&sadev->res);
+	put_device(&sadev->dev);
 	return 0;
 }
 
@@ -752,10 +834,12 @@ static void __sa1111_remove(struct sa1111 *sachip)
 	sa1111_writel(0, irqbase + SA1111_WAKEEN1);
 
 	clk_disable(sachip->clk);
+	clk_unprepare(sachip->clk);
 
 	if (sachip->irq != NO_IRQ) {
-		set_irq_chained_handler(sachip->irq, NULL);
-		set_irq_data(sachip->irq, NULL);
+		irq_set_chained_handler(sachip->irq, NULL);
+		irq_set_handler_data(sachip->irq, NULL);
+		irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
 
 		release_mem_region(sachip->phys + SA1111_INTC, 512);
 	}
@@ -765,34 +849,6 @@ static void __sa1111_remove(struct sa1111 *sachip)
 	kfree(sachip);
 }
 
-/*
- * According to the "Intel StrongARM SA-1111 Microprocessor Companion
- * Chip Specification Update" (June 2000), erratum #7, there is a
- * significant bug in the SA1111 SDRAM shared memory controller.  If
- * an access to a region of memory above 1MB relative to the bank base,
- * it is important that address bit 10 _NOT_ be asserted. Depending
- * on the configuration of the RAM, bit 10 may correspond to one
- * of several different (processor-relative) address bits.
- *
- * This routine only identifies whether or not a given DMA address
- * is susceptible to the bug.
- *
- * This should only get called for sa1111_device types due to the
- * way we configure our device dma_masks.
- */
-int dma_needs_bounce(struct device *dev, dma_addr_t addr, size_t size)
-{
-	/*
-	 * Section 4.6 of the "Intel StrongARM SA-1111 Development Module
-	 * User's Guide" mentions that jumpers R51 and R52 control the
-	 * target of SA-1111 DMA (either SDRAM bank 0 on Assabet, or
-	 * SDRAM bank 1 on Neponset). The default configuration selects
-	 * Assabet, so any address in bank 1 is necessarily invalid.
-	 */
-	return ((machine_is_assabet() || machine_is_pfs168()) &&
-		(addr >= 0xc8000000 || (addr + size) >= 0xc8000000));
-}
-
 struct sa1111_save_data {
 	unsigned int	skcr;
 	unsigned int	skpcr;
@@ -842,6 +898,9 @@ static int sa1111_suspend(struct platform_device *dev, pm_message_t state)
 	save->skpwm0   = sa1111_readl(base + SA1111_SKPWM0);
 	save->skpwm1   = sa1111_readl(base + SA1111_SKPWM1);
 
+	sa1111_writel(0, sachip->base + SA1111_SKPWM0);
+	sa1111_writel(0, sachip->base + SA1111_SKPWM1);
+
 	base = sachip->base + SA1111_INTC;
 	save->intpol0  = sa1111_readl(base + SA1111_INTPOL0);
 	save->intpol1  = sa1111_readl(base + SA1111_INTPOL1);
@@ -857,13 +916,15 @@ static int sa1111_suspend(struct platform_device *dev, pm_message_t state)
 	 */
 	val = sa1111_readl(sachip->base + SA1111_SKCR);
 	sa1111_writel(val | SKCR_SLEEP, sachip->base + SA1111_SKCR);
-	sa1111_writel(0, sachip->base + SA1111_SKPWM0);
-	sa1111_writel(0, sachip->base + SA1111_SKPWM1);
 
 	clk_disable(sachip->clk);
 
 	spin_unlock_irqrestore(&sachip->lock, flags);
 
+#ifdef CONFIG_ARCH_SA1100
+	sa1110_mb_disable();
+#endif
+
 	return 0;
 }
 
@@ -887,8 +948,6 @@ static int sa1111_resume(struct platform_device *dev)
 	if (!save)
 		return 0;
 
-	spin_lock_irqsave(&sachip->lock, flags);
-
 	/*
 	 * Ensure that the SA1111 is still here.
 	 * FIXME: shouldn't do this here.
@@ -905,6 +964,18 @@ static int sa1111_resume(struct platform_device *dev)
 	 * First of all, wake up the chip.
 	 */
 	sa1111_wake(sachip);
+
+#ifdef CONFIG_ARCH_SA1100
+	/* Enable the memory bus request/grant signals */
+	sa1110_mb_enable();
+#endif
+
+	/*
+	 * Only lock for write ops. Also, sa1111_wake must be called with
+	 * released spinlock!
+	 */
+	spin_lock_irqsave(&sachip->lock, flags);
+
 	sa1111_writel(0, sachip->base + SA1111_INTC + SA1111_INTEN0);
 	sa1111_writel(0, sachip->base + SA1111_INTC + SA1111_INTEN1);
 
@@ -939,7 +1010,7 @@ static int sa1111_resume(struct platform_device *dev)
 #define sa1111_resume  NULL
 #endif
 
-static int __devinit sa1111_probe(struct platform_device *pdev)
+static int sa1111_probe(struct platform_device *pdev)
 {
 	struct resource *mem;
 	int irq;
@@ -959,13 +1030,12 @@ static int sa1111_remove(struct platform_device *pdev)
 	struct sa1111 *sachip = platform_get_drvdata(pdev);
 
 	if (sachip) {
-		__sa1111_remove(sachip);
-		platform_set_drvdata(pdev, NULL);
-
 #ifdef CONFIG_PM
 		kfree(sachip->saved_state);
 		sachip->saved_state = NULL;
 #endif
+		__sa1111_remove(sachip);
+		platform_set_drvdata(pdev, NULL);
 	}
 
 	return 0;
@@ -987,6 +1057,7 @@ static struct platform_driver sa1111_device_driver = {
 	.resume		= sa1111_resume,
 	.driver		= {
 		.name	= "sa1111",
+		.owner	= THIS_MODULE,
 	},
 };
 
@@ -1172,16 +1243,23 @@ EXPORT_SYMBOL(sa1111_set_sleep_io);
  *	sa1111_enable_device - enable an on-chip SA1111 function block
  *	@sadev: SA1111 function block device to enable
  */
-void sa1111_enable_device(struct sa1111_dev *sadev)
+int sa1111_enable_device(struct sa1111_dev *sadev)
 {
 	struct sa1111 *sachip = sa1111_chip_driver(sadev);
 	unsigned long flags;
 	unsigned int val;
+	int ret = 0;
 
-	spin_lock_irqsave(&sachip->lock, flags);
-	val = sa1111_readl(sachip->base + SA1111_SKPCR);
-	sa1111_writel(val | sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
-	spin_unlock_irqrestore(&sachip->lock, flags);
+	if (sachip->pdata && sachip->pdata->enable)
+		ret = sachip->pdata->enable(sachip->pdata->data, sadev->devid);
+
+	if (ret == 0) {
+		spin_lock_irqsave(&sachip->lock, flags);
+		val = sa1111_readl(sachip->base + SA1111_SKPCR);
+		sa1111_writel(val | sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
+		spin_unlock_irqrestore(&sachip->lock, flags);
+	}
+	return ret;
 }
 EXPORT_SYMBOL(sa1111_enable_device);
 
@@ -1199,6 +1277,9 @@ void sa1111_disable_device(struct sa1111_dev *sadev)
 	val = sa1111_readl(sachip->base + SA1111_SKPCR);
 	sa1111_writel(val & ~sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
 	spin_unlock_irqrestore(&sachip->lock, flags);
+
+	if (sachip->pdata && sachip->pdata->disable)
+		sachip->pdata->disable(sachip->pdata->data, sadev->devid);
 }
 EXPORT_SYMBOL(sa1111_disable_device);
 
@@ -1213,7 +1294,7 @@ static int sa1111_match(struct device *_dev, struct device_driver *_drv)
 	struct sa1111_dev *dev = SA1111_DEV(_dev);
 	struct sa1111_driver *drv = SA1111_DRV(_drv);
 
-	return dev->devid == drv->devid;
+	return dev->devid & drv->devid;
 }
 
 static int sa1111_bus_suspend(struct device *dev, pm_message_t state)
@@ -1238,6 +1319,14 @@ static int sa1111_bus_resume(struct device *dev)
 	return ret;
 }
 
+static void sa1111_bus_shutdown(struct device *dev)
+{
+	struct sa1111_driver *drv = SA1111_DRV(dev->driver);
+
+	if (drv && drv->shutdown)
+		drv->shutdown(SA1111_DEV(dev));
+}
+
 static int sa1111_bus_probe(struct device *dev)
 {
 	struct sa1111_dev *sadev = SA1111_DEV(dev);
@@ -1267,6 +1356,7 @@ struct bus_type sa1111_bus_type = {
 	.remove		= sa1111_bus_remove,
 	.suspend	= sa1111_bus_suspend,
 	.resume		= sa1111_bus_resume,
+	.shutdown	= sa1111_bus_shutdown,
 };
 EXPORT_SYMBOL(sa1111_bus_type);
 
@@ -1283,9 +1373,70 @@ void sa1111_driver_unregister(struct sa1111_driver *driver)
 }
 EXPORT_SYMBOL(sa1111_driver_unregister);
 
+#ifdef CONFIG_DMABOUNCE
+/*
+ * According to the "Intel StrongARM SA-1111 Microprocessor Companion
+ * Chip Specification Update" (June 2000), erratum #7, there is a
+ * significant bug in the SA1111 SDRAM shared memory controller.  If
+ * an access to a region of memory above 1MB relative to the bank base,
+ * it is important that address bit 10 _NOT_ be asserted. Depending
+ * on the configuration of the RAM, bit 10 may correspond to one
+ * of several different (processor-relative) address bits.
+ *
+ * This routine only identifies whether or not a given DMA address
+ * is susceptible to the bug.
+ *
+ * This should only get called for sa1111_device types due to the
+ * way we configure our device dma_masks.
+ */
+static int sa1111_needs_bounce(struct device *dev, dma_addr_t addr, size_t size)
+{
+	/*
+	 * Section 4.6 of the "Intel StrongARM SA-1111 Development Module
+	 * User's Guide" mentions that jumpers R51 and R52 control the
+	 * target of SA-1111 DMA (either SDRAM bank 0 on Assabet, or
+	 * SDRAM bank 1 on Neponset). The default configuration selects
+	 * Assabet, so any address in bank 1 is necessarily invalid.
+	 */
+	return (machine_is_assabet() || machine_is_pfs168()) &&
+		(addr >= 0xc8000000 || (addr + size) >= 0xc8000000);
+}
+
+static int sa1111_notifier_call(struct notifier_block *n, unsigned long action,
+	void *data)
+{
+	struct sa1111_dev *dev = SA1111_DEV(data);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (dev->dev.dma_mask && dev->dma_mask < 0xffffffffUL) {
+			int ret = dmabounce_register_dev(&dev->dev, 1024, 4096,
+					sa1111_needs_bounce);
+			if (ret)
+				dev_err(&dev->dev, "failed to register with dmabounce: %d\n", ret);
+		}
+		break;
+
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (dev->dev.dma_mask && dev->dma_mask < 0xffffffffUL)
+			dmabounce_unregister_dev(&dev->dev);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block sa1111_bus_notifier = {
+	.notifier_call = sa1111_notifier_call,
+};
+#endif
+
 static int __init sa1111_init(void)
 {
 	int ret = bus_register(&sa1111_bus_type);
+#ifdef CONFIG_DMABOUNCE
+	if (ret == 0)
+		bus_register_notifier(&sa1111_bus_type, &sa1111_bus_notifier);
+#endif
 	if (ret == 0)
 		platform_driver_register(&sa1111_device_driver);
 	return ret;
@@ -1294,6 +1445,9 @@ static int __init sa1111_init(void)
 static void __exit sa1111_exit(void)
 {
 	platform_driver_unregister(&sa1111_device_driver);
+#ifdef CONFIG_DMABOUNCE
+	bus_unregister_notifier(&sa1111_bus_type, &sa1111_bus_notifier);
+#endif
 	bus_unregister(&sa1111_bus_type);
 }
 
diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c
index 37bda5f3dde..a20fa80776d 100644
--- a/arch/arm/common/scoop.c
+++ b/arch/arm/common/scoop.c
@@ -12,11 +12,12 @@
  */
 
 #include <linux/device.h>
+#include <linux/gpio.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
+#include <linux/export.h>
 #include <linux/io.h>
-#include <asm/gpio.h>
 #include <asm/hardware/scoop.h>
 
 /* PCMCIA to Scoop linkage
@@ -44,12 +45,12 @@ void reset_scoop(struct device *dev)
 {
 	struct scoop_dev *sdev = dev_get_drvdata(dev);
 
-	iowrite16(0x0100, sdev->base + SCOOP_MCR);  // 00
-	iowrite16(0x0000, sdev->base + SCOOP_CDR);  // 04
-	iowrite16(0x0000, sdev->base + SCOOP_CCR);  // 10
-	iowrite16(0x0000, sdev->base + SCOOP_IMR);  // 18
-	iowrite16(0x00FF, sdev->base + SCOOP_IRM);  // 14
-	iowrite16(0x0000, sdev->base + SCOOP_ISR);  // 1C
+	iowrite16(0x0100, sdev->base + SCOOP_MCR);  /* 00 */
+	iowrite16(0x0000, sdev->base + SCOOP_CDR);  /* 04 */
+	iowrite16(0x0000, sdev->base + SCOOP_CCR);  /* 10 */
+	iowrite16(0x0000, sdev->base + SCOOP_IMR);  /* 18 */
+	iowrite16(0x00FF, sdev->base + SCOOP_IRM);  /* 14 */
+	iowrite16(0x0000, sdev->base + SCOOP_ISR);  /* 1C */
 	iowrite16(0x0000, sdev->base + SCOOP_IRM);
 }
 
@@ -140,6 +141,7 @@ EXPORT_SYMBOL(reset_scoop);
 EXPORT_SYMBOL(read_scoop_reg);
 EXPORT_SYMBOL(write_scoop_reg);
 
+#ifdef CONFIG_PM
 static void check_scoop_reg(struct scoop_dev *sdev)
 {
 	unsigned short mcr;
@@ -149,7 +151,6 @@ static void check_scoop_reg(struct scoop_dev *sdev)
 		iowrite16(0x0101, sdev->base + SCOOP_MCR);
 }
 
-#ifdef CONFIG_PM
 static int scoop_suspend(struct platform_device *dev, pm_message_t state)
 {
 	struct scoop_dev *sdev = platform_get_drvdata(dev);
@@ -175,13 +176,12 @@ static int scoop_resume(struct platform_device *dev)
 #define scoop_resume	NULL
 #endif
 
-static int __devinit scoop_probe(struct platform_device *pdev)
+static int scoop_probe(struct platform_device *pdev)
 {
 	struct scoop_dev *devptr;
 	struct scoop_config *inf;
 	struct resource *mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	int ret;
-	int temp;
 
 	if (!mem)
 		return -EINVAL;
@@ -193,7 +193,7 @@ static int __devinit scoop_probe(struct platform_device *pdev)
 	spin_lock_init(&devptr->scoop_lock);
 
 	inf = pdev->dev.platform_data;
-	devptr->base = ioremap(mem->start, mem->end - mem->start + 1);
+	devptr->base = ioremap(mem->start, resource_size(mem));
 
 	if (!devptr->base) {
 		ret = -ENOMEM;
@@ -231,8 +231,6 @@ static int __devinit scoop_probe(struct platform_device *pdev)
 
 	return 0;
 
-	if (devptr->gpio.base != -1)
-		temp = gpiochip_remove(&devptr->gpio);
 err_gpio:
 	platform_set_drvdata(pdev, NULL);
 err_ioremap:
@@ -242,7 +240,7 @@ err_ioremap:
 	return ret;
 }
 
-static int __devexit scoop_remove(struct platform_device *pdev)
+static int scoop_remove(struct platform_device *pdev)
 {
 	struct scoop_dev *sdev = platform_get_drvdata(pdev);
 	int ret;
@@ -267,7 +265,7 @@ static int __devexit scoop_remove(struct platform_device *pdev)
 
 static struct platform_driver scoop_driver = {
 	.probe		= scoop_probe,
-	.remove		= __devexit_p(scoop_remove),
+	.remove		= scoop_remove,
 	.suspend	= scoop_suspend,
 	.resume		= scoop_resume,
 	.driver		= {
diff --git a/arch/arm/common/sharpsl_param.c b/arch/arm/common/sharpsl_param.c
index d56c932580e..025f6ce3859 100644
--- a/arch/arm/common/sharpsl_param.c
+++ b/arch/arm/common/sharpsl_param.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <asm/mach/sharpsl_param.h>
+#include <asm/memory.h>
 
 /*
  * Certain hardware parameters determined at the time of device manufacture,
@@ -25,8 +26,10 @@
  */
 #ifdef CONFIG_ARCH_SA1100
 #define PARAM_BASE	0xe8ffc000
+#define param_start(x)	(void *)(x)
 #else
 #define PARAM_BASE	0xa0000a00
+#define param_start(x)	__va(x)
 #endif
 #define MAGIC_CHG(a,b,c,d) ( ( d << 24 ) | ( c << 16 )  | ( b << 8 ) | a )
 
@@ -41,7 +44,7 @@ EXPORT_SYMBOL(sharpsl_param);
 
 void sharpsl_save_param(void)
 {
-	memcpy(&sharpsl_param, (void *)PARAM_BASE, sizeof(struct sharpsl_param_info));
+	memcpy(&sharpsl_param, param_start(PARAM_BASE), sizeof(struct sharpsl_param_info));
 
 	if (sharpsl_param.comadj_keyword != COMADJ_MAGIC)
 		sharpsl_param.comadj=-1;
diff --git a/arch/arm/common/time-acorn.c b/arch/arm/common/time-acorn.c
deleted file mode 100644
index deeed561b16..00000000000
--- a/arch/arm/common/time-acorn.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- *  linux/arch/arm/common/time-acorn.c
- *
- *  Copyright (c) 1996-2000 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Changelog:
- *   24-Sep-1996	RMK	Created
- *   10-Oct-1996	RMK	Brought up to date with arch-sa110eval
- *   04-Dec-1997	RMK	Updated for new arch/arm/time.c
- *   13=Jun-2004	DS	Moved to arch/arm/common b/c shared w/CLPS7500
- */
-#include <linux/timex.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-
-#include <mach/hardware.h>
-#include <asm/hardware/ioc.h>
-
-#include <asm/mach/time.h>
-
-unsigned long ioc_timer_gettimeoffset(void)
-{
-	unsigned int count1, count2, status;
-	long offset;
-
-	ioc_writeb (0, IOC_T0LATCH);
-	barrier ();
-	count1 = ioc_readb(IOC_T0CNTL) | (ioc_readb(IOC_T0CNTH) << 8);
-	barrier ();
-	status = ioc_readb(IOC_IRQREQA);
-	barrier ();
-	ioc_writeb (0, IOC_T0LATCH);
-	barrier ();
-	count2 = ioc_readb(IOC_T0CNTL) | (ioc_readb(IOC_T0CNTH) << 8);
-
-	offset = count2;
-	if (count2 < count1) {
-		/*
-		 * We have not had an interrupt between reading count1
-		 * and count2.
-		 */
-		if (status & (1 << 5))
-			offset -= LATCH;
-	} else if (count2 > count1) {
-		/*
-		 * We have just had another interrupt between reading
-		 * count1 and count2.
-		 */
-		offset -= LATCH;
-	}
-
-	offset = (LATCH - offset) * (tick_nsec / 1000);
-	return (offset + LATCH/2) / LATCH;
-}
-
-void __init ioctime_init(void)
-{
-	ioc_writeb(LATCH & 255, IOC_T0LTCHL);
-	ioc_writeb(LATCH >> 8, IOC_T0LTCHH);
-	ioc_writeb(0, IOC_T0GO);
-}
-
-static irqreturn_t
-ioc_timer_interrupt(int irq, void *dev_id)
-{
-	timer_tick();
-	return IRQ_HANDLED;
-}
-
-static struct irqaction ioc_timer_irq = {
-	.name		= "timer",
-	.flags		= IRQF_DISABLED,
-	.handler	= ioc_timer_interrupt
-};
-
-/*
- * Set up timer interrupt.
- */
-static void __init ioc_timer_init(void)
-{
-	ioctime_init();
-	setup_irq(IRQ_TIMER, &ioc_timer_irq);
-}
-
-struct sys_timer ioc_timer = {
-	.init		= ioc_timer_init,
-	.offset		= ioc_timer_gettimeoffset,
-};
-
diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c
new file mode 100644
index 00000000000..fd6bff0c5b9
--- /dev/null
+++ b/arch/arm/common/timer-sp.c
@@ -0,0 +1,304 @@
+/*
+ *  linux/arch/arm/common/timer-sp.c
+ *
+ *  Copyright (C) 1999 - 2003 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/clk.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sched_clock.h>
+
+#include <asm/hardware/arm_timer.h>
+#include <asm/hardware/timer-sp.h>
+
+static long __init sp804_get_clock_rate(struct clk *clk)
+{
+	long rate;
+	int err;
+
+	err = clk_prepare(clk);
+	if (err) {
+		pr_err("sp804: clock failed to prepare: %d\n", err);
+		clk_put(clk);
+		return err;
+	}
+
+	err = clk_enable(clk);
+	if (err) {
+		pr_err("sp804: clock failed to enable: %d\n", err);
+		clk_unprepare(clk);
+		clk_put(clk);
+		return err;
+	}
+
+	rate = clk_get_rate(clk);
+	if (rate < 0) {
+		pr_err("sp804: clock failed to get rate: %ld\n", rate);
+		clk_disable(clk);
+		clk_unprepare(clk);
+		clk_put(clk);
+	}
+
+	return rate;
+}
+
+static void __iomem *sched_clock_base;
+
+static u64 notrace sp804_read(void)
+{
+	return ~readl_relaxed(sched_clock_base + TIMER_VALUE);
+}
+
+void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base,
+						     const char *name,
+						     struct clk *clk,
+						     int use_sched_clock)
+{
+	long rate;
+
+	if (!clk) {
+		clk = clk_get_sys("sp804", name);
+		if (IS_ERR(clk)) {
+			pr_err("sp804: clock not found: %d\n",
+			       (int)PTR_ERR(clk));
+			return;
+		}
+	}
+
+	rate = sp804_get_clock_rate(clk);
+
+	if (rate < 0)
+		return;
+
+	/* setup timer 0 as free-running clocksource */
+	writel(0, base + TIMER_CTRL);
+	writel(0xffffffff, base + TIMER_LOAD);
+	writel(0xffffffff, base + TIMER_VALUE);
+	writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
+		base + TIMER_CTRL);
+
+	clocksource_mmio_init(base + TIMER_VALUE, name,
+		rate, 200, 32, clocksource_mmio_readl_down);
+
+	if (use_sched_clock) {
+		sched_clock_base = base;
+		sched_clock_register(sp804_read, 32, rate);
+	}
+}
+
+
+static void __iomem *clkevt_base;
+static unsigned long clkevt_reload;
+
+/*
+ * IRQ handler for the timer
+ */
+static irqreturn_t sp804_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = dev_id;
+
+	/* clear the interrupt */
+	writel(1, clkevt_base + TIMER_INTCLR);
+
+	evt->event_handler(evt);
+
+	return IRQ_HANDLED;
+}
+
+static void sp804_set_mode(enum clock_event_mode mode,
+	struct clock_event_device *evt)
+{
+	unsigned long ctrl = TIMER_CTRL_32BIT | TIMER_CTRL_IE;
+
+	writel(ctrl, clkevt_base + TIMER_CTRL);
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		writel(clkevt_reload, clkevt_base + TIMER_LOAD);
+		ctrl |= TIMER_CTRL_PERIODIC | TIMER_CTRL_ENABLE;
+		break;
+
+	case CLOCK_EVT_MODE_ONESHOT:
+		/* period set, and timer enabled in 'next_event' hook */
+		ctrl |= TIMER_CTRL_ONESHOT;
+		break;
+
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	default:
+		break;
+	}
+
+	writel(ctrl, clkevt_base + TIMER_CTRL);
+}
+
+static int sp804_set_next_event(unsigned long next,
+	struct clock_event_device *evt)
+{
+	unsigned long ctrl = readl(clkevt_base + TIMER_CTRL);
+
+	writel(next, clkevt_base + TIMER_LOAD);
+	writel(ctrl | TIMER_CTRL_ENABLE, clkevt_base + TIMER_CTRL);
+
+	return 0;
+}
+
+static struct clock_event_device sp804_clockevent = {
+	.features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
+		CLOCK_EVT_FEAT_DYNIRQ,
+	.set_mode	= sp804_set_mode,
+	.set_next_event	= sp804_set_next_event,
+	.rating		= 300,
+};
+
+static struct irqaction sp804_timer_irq = {
+	.name		= "timer",
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
+	.handler	= sp804_timer_interrupt,
+	.dev_id		= &sp804_clockevent,
+};
+
+void __init __sp804_clockevents_init(void __iomem *base, unsigned int irq, struct clk *clk, const char *name)
+{
+	struct clock_event_device *evt = &sp804_clockevent;
+	long rate;
+
+	if (!clk)
+		clk = clk_get_sys("sp804", name);
+	if (IS_ERR(clk)) {
+		pr_err("sp804: %s clock not found: %d\n", name,
+			(int)PTR_ERR(clk));
+		return;
+	}
+
+	rate = sp804_get_clock_rate(clk);
+	if (rate < 0)
+		return;
+
+	clkevt_base = base;
+	clkevt_reload = DIV_ROUND_CLOSEST(rate, HZ);
+	evt->name = name;
+	evt->irq = irq;
+	evt->cpumask = cpu_possible_mask;
+
+	writel(0, base + TIMER_CTRL);
+
+	setup_irq(irq, &sp804_timer_irq);
+	clockevents_config_and_register(evt, rate, 0xf, 0xffffffff);
+}
+
+static void __init sp804_of_init(struct device_node *np)
+{
+	static bool initialized = false;
+	void __iomem *base;
+	int irq;
+	u32 irq_num = 0;
+	struct clk *clk1, *clk2;
+	const char *name = of_get_property(np, "compatible", NULL);
+
+	base = of_iomap(np, 0);
+	if (WARN_ON(!base))
+		return;
+
+	/* Ensure timers are disabled */
+	writel(0, base + TIMER_CTRL);
+	writel(0, base + TIMER_2_BASE + TIMER_CTRL);
+
+	if (initialized || !of_device_is_available(np))
+		goto err;
+
+	clk1 = of_clk_get(np, 0);
+	if (IS_ERR(clk1))
+		clk1 = NULL;
+
+	/* Get the 2nd clock if the timer has 2 timer clocks */
+	if (of_count_phandle_with_args(np, "clocks", "#clock-cells") == 3) {
+		clk2 = of_clk_get(np, 1);
+		if (IS_ERR(clk2)) {
+			pr_err("sp804: %s clock not found: %d\n", np->name,
+				(int)PTR_ERR(clk2));
+			goto err;
+		}
+	} else
+		clk2 = clk1;
+
+	irq = irq_of_parse_and_map(np, 0);
+	if (irq <= 0)
+		goto err;
+
+	of_property_read_u32(np, "arm,sp804-has-irq", &irq_num);
+	if (irq_num == 2) {
+		__sp804_clockevents_init(base + TIMER_2_BASE, irq, clk2, name);
+		__sp804_clocksource_and_sched_clock_init(base, name, clk1, 1);
+	} else {
+		__sp804_clockevents_init(base, irq, clk1 , name);
+		__sp804_clocksource_and_sched_clock_init(base + TIMER_2_BASE,
+							 name, clk2, 1);
+	}
+	initialized = true;
+
+	return;
+err:
+	iounmap(base);
+}
+CLOCKSOURCE_OF_DECLARE(sp804, "arm,sp804", sp804_of_init);
+
+static void __init integrator_cp_of_init(struct device_node *np)
+{
+	static int init_count = 0;
+	void __iomem *base;
+	int irq;
+	const char *name = of_get_property(np, "compatible", NULL);
+	struct clk *clk;
+
+	base = of_iomap(np, 0);
+	if (WARN_ON(!base))
+		return;
+	clk = of_clk_get(np, 0);
+	if (WARN_ON(IS_ERR(clk)))
+		return;
+
+	/* Ensure timer is disabled */
+	writel(0, base + TIMER_CTRL);
+
+	if (init_count == 2 || !of_device_is_available(np))
+		goto err;
+
+	if (!init_count)
+		__sp804_clocksource_and_sched_clock_init(base, name, clk, 0);
+	else {
+		irq = irq_of_parse_and_map(np, 0);
+		if (irq <= 0)
+			goto err;
+
+		__sp804_clockevents_init(base, irq, clk, name);
+	}
+
+	init_count++;
+	return;
+err:
+	iounmap(base);
+}
+CLOCKSOURCE_OF_DECLARE(intcp, "arm,integrator-cp-timer", integrator_cp_of_init);
diff --git a/arch/arm/common/uengine.c b/arch/arm/common/uengine.c
deleted file mode 100644
index b520e56216a..00000000000
--- a/arch/arm/common/uengine.c
+++ /dev/null
@@ -1,507 +0,0 @@
-/*
- * Generic library functions for the microengines found on the Intel
- * IXP2000 series of network processors.
- *
- * Copyright (C) 2004, 2005 Lennert Buytenhek <buytenh@wantstofly.org>
- * Dedicated to Marija Kulikova.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of the
- * License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/io.h>
-#include <mach/hardware.h>
-#include <asm/hardware/uengine.h>
-
-#if defined(CONFIG_ARCH_IXP2000)
-#define IXP_UENGINE_CSR_VIRT_BASE	IXP2000_UENGINE_CSR_VIRT_BASE
-#define IXP_PRODUCT_ID			IXP2000_PRODUCT_ID
-#define IXP_MISC_CONTROL		IXP2000_MISC_CONTROL
-#define IXP_RESET1			IXP2000_RESET1
-#else
-#if defined(CONFIG_ARCH_IXP23XX)
-#define IXP_UENGINE_CSR_VIRT_BASE	IXP23XX_UENGINE_CSR_VIRT_BASE
-#define IXP_PRODUCT_ID			IXP23XX_PRODUCT_ID
-#define IXP_MISC_CONTROL		IXP23XX_MISC_CONTROL
-#define IXP_RESET1			IXP23XX_RESET1
-#else
-#error unknown platform
-#endif
-#endif
-
-#define USTORE_ADDRESS			0x000
-#define USTORE_DATA_LOWER		0x004
-#define USTORE_DATA_UPPER		0x008
-#define CTX_ENABLES			0x018
-#define CC_ENABLE			0x01c
-#define CSR_CTX_POINTER			0x020
-#define INDIRECT_CTX_STS		0x040
-#define ACTIVE_CTX_STS			0x044
-#define INDIRECT_CTX_SIG_EVENTS		0x048
-#define INDIRECT_CTX_WAKEUP_EVENTS	0x050
-#define NN_PUT				0x080
-#define NN_GET				0x084
-#define TIMESTAMP_LOW			0x0c0
-#define TIMESTAMP_HIGH			0x0c4
-#define T_INDEX_BYTE_INDEX		0x0f4
-#define LOCAL_CSR_STATUS		0x180
-
-u32 ixp2000_uengine_mask;
-
-static void *ixp2000_uengine_csr_area(int uengine)
-{
-	return ((void *)IXP_UENGINE_CSR_VIRT_BASE) + (uengine << 10);
-}
-
-/*
- * LOCAL_CSR_STATUS=1 after a read or write to a microengine's CSR
- * space means that the microengine we tried to access was also trying
- * to access its own CSR space on the same clock cycle as we did.  When
- * this happens, we lose the arbitration process by default, and the
- * read or write we tried to do was not actually performed, so we try
- * again until it succeeds.
- */
-u32 ixp2000_uengine_csr_read(int uengine, int offset)
-{
-	void *uebase;
-	u32 *local_csr_status;
-	u32 *reg;
-	u32 value;
-
-	uebase = ixp2000_uengine_csr_area(uengine);
-
-	local_csr_status = (u32 *)(uebase + LOCAL_CSR_STATUS);
-	reg = (u32 *)(uebase + offset);
-	do {
-		value = ixp2000_reg_read(reg);
-	} while (ixp2000_reg_read(local_csr_status) & 1);
-
-	return value;
-}
-EXPORT_SYMBOL(ixp2000_uengine_csr_read);
-
-void ixp2000_uengine_csr_write(int uengine, int offset, u32 value)
-{
-	void *uebase;
-	u32 *local_csr_status;
-	u32 *reg;
-
-	uebase = ixp2000_uengine_csr_area(uengine);
-
-	local_csr_status = (u32 *)(uebase + LOCAL_CSR_STATUS);
-	reg = (u32 *)(uebase + offset);
-	do {
-		ixp2000_reg_write(reg, value);
-	} while (ixp2000_reg_read(local_csr_status) & 1);
-}
-EXPORT_SYMBOL(ixp2000_uengine_csr_write);
-
-void ixp2000_uengine_reset(u32 uengine_mask)
-{
-	u32 value;
-
-	value = ixp2000_reg_read(IXP_RESET1) & ~ixp2000_uengine_mask;
-
-	uengine_mask &= ixp2000_uengine_mask;
-	ixp2000_reg_wrb(IXP_RESET1, value | uengine_mask);
-	ixp2000_reg_wrb(IXP_RESET1, value);
-}
-EXPORT_SYMBOL(ixp2000_uengine_reset);
-
-void ixp2000_uengine_set_mode(int uengine, u32 mode)
-{
-	/*
-	 * CTL_STR_PAR_EN: unconditionally enable parity checking on
-	 * control store.
-	 */
-	mode |= 0x10000000;
-	ixp2000_uengine_csr_write(uengine, CTX_ENABLES, mode);
-
-	/*
-	 * Enable updating of condition codes.
-	 */
-	ixp2000_uengine_csr_write(uengine, CC_ENABLE, 0x00002000);
-
-	/*
-	 * Initialise other per-microengine registers.
-	 */
-	ixp2000_uengine_csr_write(uengine, NN_PUT, 0x00);
-	ixp2000_uengine_csr_write(uengine, NN_GET, 0x00);
-	ixp2000_uengine_csr_write(uengine, T_INDEX_BYTE_INDEX, 0);
-}
-EXPORT_SYMBOL(ixp2000_uengine_set_mode);
-
-static int make_even_parity(u32 x)
-{
-	return hweight32(x) & 1;
-}
-
-static void ustore_write(int uengine, u64 insn)
-{
-	/*
-	 * Generate even parity for top and bottom 20 bits.
-	 */
-	insn |= (u64)make_even_parity((insn >> 20) & 0x000fffff) << 41;
-	insn |= (u64)make_even_parity(insn & 0x000fffff) << 40;
-
-	/*
-	 * Write to microstore.  The second write auto-increments
-	 * the USTORE_ADDRESS index register.
-	 */
-	ixp2000_uengine_csr_write(uengine, USTORE_DATA_LOWER, (u32)insn);
-	ixp2000_uengine_csr_write(uengine, USTORE_DATA_UPPER, (u32)(insn >> 32));
-}
-
-void ixp2000_uengine_load_microcode(int uengine, u8 *ucode, int insns)
-{
-	int i;
-
-	/*
-	 * Start writing to microstore at address 0.
-	 */
-	ixp2000_uengine_csr_write(uengine, USTORE_ADDRESS, 0x80000000);
-	for (i = 0; i < insns; i++) {
-		u64 insn;
-
-		insn = (((u64)ucode[0]) << 32) |
-			(((u64)ucode[1]) << 24) |
-			(((u64)ucode[2]) << 16) |
-			(((u64)ucode[3]) << 8) |
-			((u64)ucode[4]);
-		ucode += 5;
-
-		ustore_write(uengine, insn);
-	}
-
-	/*
- 	 * Pad with a few NOPs at the end (to avoid the microengine
-	 * aborting as it prefetches beyond the last instruction), unless
-	 * we run off the end of the instruction store first, at which
-	 * point the address register will wrap back to zero.
-	 */
-	for (i = 0; i < 4; i++) {
-		u32 addr;
-
-		addr = ixp2000_uengine_csr_read(uengine, USTORE_ADDRESS);
-		if (addr == 0x80000000)
-			break;
-		ustore_write(uengine, 0xf0000c0300ULL);
-	}
-
-	/*
-	 * End programming.
-	 */
-	ixp2000_uengine_csr_write(uengine, USTORE_ADDRESS, 0x00000000);
-}
-EXPORT_SYMBOL(ixp2000_uengine_load_microcode);
-
-void ixp2000_uengine_init_context(int uengine, int context, int pc)
-{
-	/*
-	 * Select the right context for indirect access.
-	 */
-	ixp2000_uengine_csr_write(uengine, CSR_CTX_POINTER, context);
-
-	/*
-	 * Initialise signal masks to immediately go to Ready state.
-	 */
-	ixp2000_uengine_csr_write(uengine, INDIRECT_CTX_SIG_EVENTS, 1);
-	ixp2000_uengine_csr_write(uengine, INDIRECT_CTX_WAKEUP_EVENTS, 1);
-
-	/*
-	 * Set program counter.
-	 */
-	ixp2000_uengine_csr_write(uengine, INDIRECT_CTX_STS, pc);
-}
-EXPORT_SYMBOL(ixp2000_uengine_init_context);
-
-void ixp2000_uengine_start_contexts(int uengine, u8 ctx_mask)
-{
-	u32 mask;
-
-	/*
-	 * Enable the specified context to go to Executing state.
-	 */
-	mask = ixp2000_uengine_csr_read(uengine, CTX_ENABLES);
-	mask |= ctx_mask << 8;
-	ixp2000_uengine_csr_write(uengine, CTX_ENABLES, mask);
-}
-EXPORT_SYMBOL(ixp2000_uengine_start_contexts);
-
-void ixp2000_uengine_stop_contexts(int uengine, u8 ctx_mask)
-{
-	u32 mask;
-
-	/*
-	 * Disable the Ready->Executing transition.  Note that this
-	 * does not stop the context until it voluntarily yields.
-	 */
-	mask = ixp2000_uengine_csr_read(uengine, CTX_ENABLES);
-	mask &= ~(ctx_mask << 8);
-	ixp2000_uengine_csr_write(uengine, CTX_ENABLES, mask);
-}
-EXPORT_SYMBOL(ixp2000_uengine_stop_contexts);
-
-static int check_ixp_type(struct ixp2000_uengine_code *c)
-{
-	u32 product_id;
-	u32 rev;
-
-	product_id = ixp2000_reg_read(IXP_PRODUCT_ID);
-	if (((product_id >> 16) & 0x1f) != 0)
-		return 0;
-
-	switch ((product_id >> 8) & 0xff) {
-#ifdef CONFIG_ARCH_IXP2000
-	case 0:		/* IXP2800 */
-		if (!(c->cpu_model_bitmask & 4))
-			return 0;
-		break;
-
-	case 1:		/* IXP2850 */
-		if (!(c->cpu_model_bitmask & 8))
-			return 0;
-		break;
-
-	case 2:		/* IXP2400 */
-		if (!(c->cpu_model_bitmask & 2))
-			return 0;
-		break;
-#endif
-
-#ifdef CONFIG_ARCH_IXP23XX
-	case 4:		/* IXP23xx */
-		if (!(c->cpu_model_bitmask & 0x3f0))
-			return 0;
-		break;
-#endif
-
-	default:
-		return 0;
-	}
-
-	rev = product_id & 0xff;
-	if (rev < c->cpu_min_revision || rev > c->cpu_max_revision)
-		return 0;
-
-	return 1;
-}
-
-static void generate_ucode(u8 *ucode, u32 *gpr_a, u32 *gpr_b)
-{
-	int offset;
-	int i;
-
-	offset = 0;
-
-	for (i = 0; i < 128; i++) {
-		u8 b3;
-		u8 b2;
-		u8 b1;
-		u8 b0;
-
-		b3 = (gpr_a[i] >> 24) & 0xff;
-		b2 = (gpr_a[i] >> 16) & 0xff;
-		b1 = (gpr_a[i] >> 8) & 0xff;
-		b0 = gpr_a[i] & 0xff;
-
-		// immed[@ai, (b1 << 8) | b0]
-		// 11110000 0000VVVV VVVV11VV VVVVVV00 1IIIIIII
-		ucode[offset++] = 0xf0;
-		ucode[offset++] = (b1 >> 4);
-		ucode[offset++] = (b1 << 4) | 0x0c | (b0 >> 6);
-		ucode[offset++] = (b0 << 2);
-		ucode[offset++] = 0x80 | i;
-
-		// immed_w1[@ai, (b3 << 8) | b2]
-		// 11110100 0100VVVV VVVV11VV VVVVVV00 1IIIIIII
-		ucode[offset++] = 0xf4;
-		ucode[offset++] = 0x40 | (b3 >> 4);
-		ucode[offset++] = (b3 << 4) | 0x0c | (b2 >> 6);
-		ucode[offset++] = (b2 << 2);
-		ucode[offset++] = 0x80 | i;
-	}
-
-	for (i = 0; i < 128; i++) {
-		u8 b3;
-		u8 b2;
-		u8 b1;
-		u8 b0;
-
-		b3 = (gpr_b[i] >> 24) & 0xff;
-		b2 = (gpr_b[i] >> 16) & 0xff;
-		b1 = (gpr_b[i] >> 8) & 0xff;
-		b0 = gpr_b[i] & 0xff;
-
-		// immed[@bi, (b1 << 8) | b0]
-		// 11110000 0000VVVV VVVV001I IIIIII11 VVVVVVVV
-		ucode[offset++] = 0xf0;
-		ucode[offset++] = (b1 >> 4);
-		ucode[offset++] = (b1 << 4) | 0x02 | (i >> 6);
-		ucode[offset++] = (i << 2) | 0x03;
-		ucode[offset++] = b0;
-
-		// immed_w1[@bi, (b3 << 8) | b2]
-		// 11110100 0100VVVV VVVV001I IIIIII11 VVVVVVVV
-		ucode[offset++] = 0xf4;
-		ucode[offset++] = 0x40 | (b3 >> 4);
-		ucode[offset++] = (b3 << 4) | 0x02 | (i >> 6);
-		ucode[offset++] = (i << 2) | 0x03;
-		ucode[offset++] = b2;
-	}
-
-	// ctx_arb[kill]
-	ucode[offset++] = 0xe0;
-	ucode[offset++] = 0x00;
-	ucode[offset++] = 0x01;
-	ucode[offset++] = 0x00;
-	ucode[offset++] = 0x00;
-}
-
-static int set_initial_registers(int uengine, struct ixp2000_uengine_code *c)
-{
-	int per_ctx_regs;
-	u32 *gpr_a;
-	u32 *gpr_b;
-	u8 *ucode;
-	int i;
-
-	gpr_a = kzalloc(128 * sizeof(u32), GFP_KERNEL);
-	gpr_b = kzalloc(128 * sizeof(u32), GFP_KERNEL);
-	ucode = kmalloc(513 * 5, GFP_KERNEL);
-	if (gpr_a == NULL || gpr_b == NULL || ucode == NULL) {
-		kfree(ucode);
-		kfree(gpr_b);
-		kfree(gpr_a);
-		return 1;
-	}
-
-	per_ctx_regs = 16;
-	if (c->uengine_parameters & IXP2000_UENGINE_4_CONTEXTS)
-		per_ctx_regs = 32;
-
-	for (i = 0; i < 256; i++) {
-		struct ixp2000_reg_value *r = c->initial_reg_values + i;
-		u32 *bank;
-		int inc;
-		int j;
-
-		if (r->reg == -1)
-			break;
-
-		bank = (r->reg & 0x400) ? gpr_b : gpr_a;
-		inc = (r->reg & 0x80) ? 128 : per_ctx_regs;
-
-		j = r->reg & 0x7f;
-		while (j < 128) {
-			bank[j] = r->value;
-			j += inc;
-		}
-	}
-
-	generate_ucode(ucode, gpr_a, gpr_b);
-	ixp2000_uengine_load_microcode(uengine, ucode, 513);
-	ixp2000_uengine_init_context(uengine, 0, 0);
-	ixp2000_uengine_start_contexts(uengine, 0x01);
-	for (i = 0; i < 100; i++) {
-		u32 status;
-
-		status = ixp2000_uengine_csr_read(uengine, ACTIVE_CTX_STS);
-		if (!(status & 0x80000000))
-			break;
-	}
-	ixp2000_uengine_stop_contexts(uengine, 0x01);
-
-	kfree(ucode);
-	kfree(gpr_b);
-	kfree(gpr_a);
-
-	return !!(i == 100);
-}
-
-int ixp2000_uengine_load(int uengine, struct ixp2000_uengine_code *c)
-{
-	int ctx;
-
-	if (!check_ixp_type(c))
-		return 1;
-
-	if (!(ixp2000_uengine_mask & (1 << uengine)))
-		return 1;
-
-	ixp2000_uengine_reset(1 << uengine);
-	ixp2000_uengine_set_mode(uengine, c->uengine_parameters);
-	if (set_initial_registers(uengine, c))
-		return 1;
-	ixp2000_uengine_load_microcode(uengine, c->insns, c->num_insns);
-
-	for (ctx = 0; ctx < 8; ctx++)
-		ixp2000_uengine_init_context(uengine, ctx, 0);
-
-	return 0;
-}
-EXPORT_SYMBOL(ixp2000_uengine_load);
-
-
-static int __init ixp2000_uengine_init(void)
-{
-	int uengine;
-	u32 value;
-
-	/*
-	 * Determine number of microengines present.
-	 */
-	switch ((ixp2000_reg_read(IXP_PRODUCT_ID) >> 8) & 0x1fff) {
-#ifdef CONFIG_ARCH_IXP2000
-	case 0:		/* IXP2800 */
-	case 1:		/* IXP2850 */
-		ixp2000_uengine_mask = 0x00ff00ff;
-		break;
-
-	case 2:		/* IXP2400 */
-		ixp2000_uengine_mask = 0x000f000f;
-		break;
-#endif
-
-#ifdef CONFIG_ARCH_IXP23XX
-	case 4:		/* IXP23xx */
-		ixp2000_uengine_mask = (*IXP23XX_EXP_CFG_FUSE >> 8) & 0xf;
-		break;
-#endif
-
-	default:
-		printk(KERN_INFO "Detected unknown IXP2000 model (%.8x)\n",
-			(unsigned int)ixp2000_reg_read(IXP_PRODUCT_ID));
-		ixp2000_uengine_mask = 0x00000000;
-		break;
-	}
-
-	/*
-	 * Reset microengines.
-	 */
-	ixp2000_uengine_reset(ixp2000_uengine_mask);
-
-	/*
-	 * Synchronise timestamp counters across all microengines.
-	 */
-	value = ixp2000_reg_read(IXP_MISC_CONTROL);
-	ixp2000_reg_wrb(IXP_MISC_CONTROL, value & ~0x80);
-	for (uengine = 0; uengine < 32; uengine++) {
-		if (ixp2000_uengine_mask & (1 << uengine)) {
-			ixp2000_uengine_csr_write(uengine, TIMESTAMP_LOW, 0);
-			ixp2000_uengine_csr_write(uengine, TIMESTAMP_HIGH, 0);
-		}
-	}
-	ixp2000_reg_wrb(IXP_MISC_CONTROL, value | 0x80);
-
-	return 0;
-}
-
-subsys_initcall(ixp2000_uengine_init);
diff --git a/arch/arm/common/via82c505.c b/arch/arm/common/via82c505.c
deleted file mode 100644
index 8421d39109b..00000000000
--- a/arch/arm/common/via82c505.c
+++ /dev/null
@@ -1,92 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/io.h>
-
-#include <asm/system.h>
-
-#include <asm/mach/pci.h>
-
-#define MAX_SLOTS		7
-
-#define CONFIG_CMD(bus, devfn, where)   (0x80000000 | (bus->number << 16) | (devfn << 8) | (where & ~3))
-
-static int
-via82c505_read_config(struct pci_bus *bus, unsigned int devfn, int where,
-		      int size, u32 *value)
-{
-	outl(CONFIG_CMD(bus,devfn,where),0xCF8);
-	switch (size) {
-	case 1:
-		*value=inb(0xCFC + (where&3));
-		break;
-	case 2:
-		*value=inw(0xCFC + (where&2));
-		break;
-	case 4:
-		*value=inl(0xCFC);
-		break;
-	}
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int
-via82c505_write_config(struct pci_bus *bus, unsigned int devfn, int where,
-		       int size, u32 value)
-{
-	outl(CONFIG_CMD(bus,devfn,where),0xCF8);
-	switch (size) {
-	case 1:
-		outb(value, 0xCFC + (where&3));
-		break;
-	case 2:
-		outw(value, 0xCFC + (where&2));
-		break;
-	case 4:
-		outl(value, 0xCFC);
-		break;
-	}
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static struct pci_ops via82c505_ops = {
-	.read	= via82c505_read_config,
-	.write	= via82c505_write_config,
-};
-
-void __init via82c505_preinit(void)
-{
-	printk(KERN_DEBUG "PCI: VIA 82c505\n");
-	if (!request_region(0xA8,2,"via config")) {
-		printk(KERN_WARNING"VIA 82c505: Unable to request region 0xA8\n");
-		return;
-	}
-	if (!request_region(0xCF8,8,"pci config")) {
-		printk(KERN_WARNING"VIA 82c505: Unable to request region 0xCF8\n");
-		release_region(0xA8, 2);
-		return;
-	}
-
-	/* Enable compatible Mode */
-	outb(0x96,0xA8);
-	outb(0x18,0xA9);
-	outb(0x93,0xA8);
-	outb(0xd0,0xA9);
-
-}
-
-int __init via82c505_setup(int nr, struct pci_sys_data *sys)
-{
-	return (nr == 0);
-}
-
-struct pci_bus * __init via82c505_scan_bus(int nr, struct pci_sys_data *sysdata)
-{
-	if (nr == 0)
-		return pci_scan_bus(0, &via82c505_ops, sysdata);
-
-	return NULL;
-}
diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c
deleted file mode 100644
index f232941de8a..00000000000
--- a/arch/arm/common/vic.c
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- *  linux/arch/arm/common/vic.c
- *
- *  Copyright (C) 1999 - 2003 ARM Limited
- *  Copyright (C) 2000 Deep Blue Solutions Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/io.h>
-#include <linux/sysdev.h>
-#include <linux/device.h>
-#include <linux/amba/bus.h>
-
-#include <asm/mach/irq.h>
-#include <asm/hardware/vic.h>
-
-static void vic_ack_irq(unsigned int irq)
-{
-	void __iomem *base = get_irq_chip_data(irq);
-	irq &= 31;
-	writel(1 << irq, base + VIC_INT_ENABLE_CLEAR);
-	/* moreover, clear the soft-triggered, in case it was the reason */
-	writel(1 << irq, base + VIC_INT_SOFT_CLEAR);
-}
-
-static void vic_mask_irq(unsigned int irq)
-{
-	void __iomem *base = get_irq_chip_data(irq);
-	irq &= 31;
-	writel(1 << irq, base + VIC_INT_ENABLE_CLEAR);
-}
-
-static void vic_unmask_irq(unsigned int irq)
-{
-	void __iomem *base = get_irq_chip_data(irq);
-	irq &= 31;
-	writel(1 << irq, base + VIC_INT_ENABLE);
-}
-
-/**
- * vic_init2 - common initialisation code
- * @base: Base of the VIC.
- *
- * Common initialisation code for registeration
- * and resume.
-*/
-static void vic_init2(void __iomem *base)
-{
-	int i;
-
-	for (i = 0; i < 16; i++) {
-		void __iomem *reg = base + VIC_VECT_CNTL0 + (i * 4);
-		writel(VIC_VECT_CNTL_ENABLE | i, reg);
-	}
-
-	writel(32, base + VIC_PL190_DEF_VECT_ADDR);
-}
-
-#if defined(CONFIG_PM)
-/**
- * struct vic_device - VIC PM device
- * @sysdev: The system device which is registered.
- * @irq: The IRQ number for the base of the VIC.
- * @base: The register base for the VIC.
- * @resume_sources: A bitmask of interrupts for resume.
- * @resume_irqs: The IRQs enabled for resume.
- * @int_select: Save for VIC_INT_SELECT.
- * @int_enable: Save for VIC_INT_ENABLE.
- * @soft_int: Save for VIC_INT_SOFT.
- * @protect: Save for VIC_PROTECT.
- */
-struct vic_device {
-	struct sys_device sysdev;
-
-	void __iomem	*base;
-	int		irq;
-	u32		resume_sources;
-	u32		resume_irqs;
-	u32		int_select;
-	u32		int_enable;
-	u32		soft_int;
-	u32		protect;
-};
-
-/* we cannot allocate memory when VICs are initially registered */
-static struct vic_device vic_devices[CONFIG_ARM_VIC_NR];
-
-static inline struct vic_device *to_vic(struct sys_device *sys)
-{
-	return container_of(sys, struct vic_device, sysdev);
-}
-
-static int vic_id;
-
-static int vic_class_resume(struct sys_device *dev)
-{
-	struct vic_device *vic = to_vic(dev);
-	void __iomem *base = vic->base;
-
-	printk(KERN_DEBUG "%s: resuming vic at %p\n", __func__, base);
-
-	/* re-initialise static settings */
-	vic_init2(base);
-
-	writel(vic->int_select, base + VIC_INT_SELECT);
-	writel(vic->protect, base + VIC_PROTECT);
-
-	/* set the enabled ints and then clear the non-enabled */
-	writel(vic->int_enable, base + VIC_INT_ENABLE);
-	writel(~vic->int_enable, base + VIC_INT_ENABLE_CLEAR);
-
-	/* and the same for the soft-int register */
-
-	writel(vic->soft_int, base + VIC_INT_SOFT);
-	writel(~vic->soft_int, base + VIC_INT_SOFT_CLEAR);
-
-	return 0;
-}
-
-static int vic_class_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct vic_device *vic = to_vic(dev);
-	void __iomem *base = vic->base;
-
-	printk(KERN_DEBUG "%s: suspending vic at %p\n", __func__, base);
-
-	vic->int_select = readl(base + VIC_INT_SELECT);
-	vic->int_enable = readl(base + VIC_INT_ENABLE);
-	vic->soft_int = readl(base + VIC_INT_SOFT);
-	vic->protect = readl(base + VIC_PROTECT);
-
-	/* set the interrupts (if any) that are used for
-	 * resuming the system */
-
-	writel(vic->resume_irqs, base + VIC_INT_ENABLE);
-	writel(~vic->resume_irqs, base + VIC_INT_ENABLE_CLEAR);
-
-	return 0;
-}
-
-struct sysdev_class vic_class = {
-	.name		= "vic",
-	.suspend	= vic_class_suspend,
-	.resume		= vic_class_resume,
-};
-
-/**
- * vic_pm_register - Register a VIC for later power management control
- * @base: The base address of the VIC.
- * @irq: The base IRQ for the VIC.
- * @resume_sources: bitmask of interrupts allowed for resume sources.
- *
- * Register the VIC with the system device tree so that it can be notified
- * of suspend and resume requests and ensure that the correct actions are
- * taken to re-instate the settings on resume.
- */
-static void __init vic_pm_register(void __iomem *base, unsigned int irq, u32 resume_sources)
-{
-	struct vic_device *v;
-
-	if (vic_id >= ARRAY_SIZE(vic_devices))
-		printk(KERN_ERR "%s: too few VICs, increase CONFIG_ARM_VIC_NR\n", __func__);
-	else {
-		v = &vic_devices[vic_id];
-		v->base = base;
-		v->resume_sources = resume_sources;
-		v->irq = irq;
-		vic_id++;
-	}
-}
-
-/**
- * vic_pm_init - initicall to register VIC pm
- *
- * This is called via late_initcall() to register
- * the resources for the VICs due to the early
- * nature of the VIC's registration.
-*/
-static int __init vic_pm_init(void)
-{
-	struct vic_device *dev = vic_devices;
-	int err;
-	int id;
-
-	if (vic_id == 0)
-		return 0;
-
-	err = sysdev_class_register(&vic_class);
-	if (err) {
-		printk(KERN_ERR "%s: cannot register class\n", __func__);
-		return err;
-	}
-
-	for (id = 0; id < vic_id; id++, dev++) {
-		dev->sysdev.id = id;
-		dev->sysdev.cls = &vic_class;
-
-		err = sysdev_register(&dev->sysdev);
-		if (err) {
-			printk(KERN_ERR "%s: failed to register device\n",
-			       __func__);
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-late_initcall(vic_pm_init);
-
-static struct vic_device *vic_from_irq(unsigned int irq)
-{
-        struct vic_device *v = vic_devices;
-	unsigned int base_irq = irq & ~31;
-	int id;
-
-	for (id = 0; id < vic_id; id++, v++) {
-		if (v->irq == base_irq)
-			return v;
-	}
-
-	return NULL;
-}
-
-static int vic_set_wake(unsigned int irq, unsigned int on)
-{
-	struct vic_device *v = vic_from_irq(irq);
-	unsigned int off = irq & 31;
-	u32 bit = 1 << off;
-
-	if (!v)
-		return -EINVAL;
-
-	if (!(bit & v->resume_sources))
-		return -EINVAL;
-
-	if (on)
-		v->resume_irqs |= bit;
-	else
-		v->resume_irqs &= ~bit;
-
-	return 0;
-}
-
-#else
-static inline void vic_pm_register(void __iomem *base, unsigned int irq, u32 arg1) { }
-
-#define vic_set_wake NULL
-#endif /* CONFIG_PM */
-
-static struct irq_chip vic_chip = {
-	.name	= "VIC",
-	.ack	= vic_ack_irq,
-	.mask	= vic_mask_irq,
-	.unmask	= vic_unmask_irq,
-	.set_wake = vic_set_wake,
-};
-
-/* The PL190 cell from ARM has been modified by ST, so handle both here */
-static void vik_init_st(void __iomem *base, unsigned int irq_start,
-			 u32 vic_sources);
-
-/**
- * vic_init - initialise a vectored interrupt controller
- * @base: iomem base address
- * @irq_start: starting interrupt number, must be muliple of 32
- * @vic_sources: bitmask of interrupt sources to allow
- * @resume_sources: bitmask of interrupt sources to allow for resume
- */
-void __init vic_init(void __iomem *base, unsigned int irq_start,
-		     u32 vic_sources, u32 resume_sources)
-{
-	unsigned int i;
-	u32 cellid = 0;
-	enum amba_vendor vendor;
-
-	/* Identify which VIC cell this one is, by reading the ID */
-	for (i = 0; i < 4; i++) {
-		u32 addr = ((u32)base & PAGE_MASK) + 0xfe0 + (i * 4);
-		cellid |= (readl(addr) & 0xff) << (8 * i);
-	}
-	vendor = (cellid >> 12) & 0xff;
-	printk(KERN_INFO "VIC @%p: id 0x%08x, vendor 0x%02x\n",
-	       base, cellid, vendor);
-
-	switch(vendor) {
-	case AMBA_VENDOR_ST:
-		vik_init_st(base, irq_start, vic_sources);
-		return;
-	default:
-		printk(KERN_WARNING "VIC: unknown vendor, continuing anyways\n");
-		/* fall through */
-	case AMBA_VENDOR_ARM:
-		break;
-	}
-
-	/* Disable all interrupts initially. */
-
-	writel(0, base + VIC_INT_SELECT);
-	writel(0, base + VIC_INT_ENABLE);
-	writel(~0, base + VIC_INT_ENABLE_CLEAR);
-	writel(0, base + VIC_IRQ_STATUS);
-	writel(0, base + VIC_ITCR);
-	writel(~0, base + VIC_INT_SOFT_CLEAR);
-
-	/*
-	 * Make sure we clear all existing interrupts
-	 */
-	writel(0, base + VIC_PL190_VECT_ADDR);
-	for (i = 0; i < 19; i++) {
-		unsigned int value;
-
-		value = readl(base + VIC_PL190_VECT_ADDR);
-		writel(value, base + VIC_PL190_VECT_ADDR);
-	}
-
-	vic_init2(base);
-
-	for (i = 0; i < 32; i++) {
-		if (vic_sources & (1 << i)) {
-			unsigned int irq = irq_start + i;
-
-			set_irq_chip(irq, &vic_chip);
-			set_irq_chip_data(irq, base);
-			set_irq_handler(irq, handle_level_irq);
-			set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-		}
-	}
-
-	vic_pm_register(base, irq_start, resume_sources);
-}
-
-/*
- * The PL190 cell from ARM has been modified by ST to handle 64 interrupts.
- * The original cell has 32 interrupts, while the modified one has 64,
- * replocating two blocks 0x00..0x1f in 0x20..0x3f. In that case
- * the probe function is called twice, with base set to offset 000
- *  and 020 within the page. We call this "second block".
- */
-static void __init vik_init_st(void __iomem *base, unsigned int irq_start,
-				u32 vic_sources)
-{
-	unsigned int i;
-	int vic_2nd_block = ((unsigned long)base & ~PAGE_MASK) != 0;
-
-	/* Disable all interrupts initially. */
-
-	writel(0, base + VIC_INT_SELECT);
-	writel(0, base + VIC_INT_ENABLE);
-	writel(~0, base + VIC_INT_ENABLE_CLEAR);
-	writel(0, base + VIC_IRQ_STATUS);
-	writel(0, base + VIC_ITCR);
-	writel(~0, base + VIC_INT_SOFT_CLEAR);
-
-	/*
-	 * Make sure we clear all existing interrupts. The vector registers
-	 * in this cell are after the second block of general registers,
-	 * so we can address them using standard offsets, but only from
-	 * the second base address, which is 0x20 in the page
-	 */
-	if (vic_2nd_block) {
-		writel(0, base + VIC_PL190_VECT_ADDR);
-		for (i = 0; i < 19; i++) {
-			unsigned int value;
-
-			value = readl(base + VIC_PL190_VECT_ADDR);
-			writel(value, base + VIC_PL190_VECT_ADDR);
-		}
-		/* ST has 16 vectors as well, but we don't enable them by now */
-		for (i = 0; i < 16; i++) {
-			void __iomem *reg = base + VIC_VECT_CNTL0 + (i * 4);
-			writel(0, reg);
-		}
-
-		writel(32, base + VIC_PL190_DEF_VECT_ADDR);
-	}
-
-	for (i = 0; i < 32; i++) {
-		if (vic_sources & (1 << i)) {
-			unsigned int irq = irq_start + i;
-
-			set_irq_chip(irq, &vic_chip);
-			set_irq_chip_data(irq, base);
-			set_irq_handler(irq, handle_level_irq);
-			set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-		}
-	}
-}
diff --git a/arch/arm/common/vlock.S b/arch/arm/common/vlock.S
new file mode 100644
index 00000000000..8b7df283fed
--- /dev/null
+++ b/arch/arm/common/vlock.S
@@ -0,0 +1,108 @@
+/*
+ * vlock.S - simple voting lock implementation for ARM
+ *
+ * Created by:	Dave Martin, 2012-08-16
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ *
+ * This algorithm is described in more detail in
+ * Documentation/arm/vlocks.txt.
+ */
+
+#include <linux/linkage.h>
+#include "vlock.h"
+
+/* Select different code if voting flags  can fit in a single word. */
+#if VLOCK_VOTING_SIZE > 4
+#define FEW(x...)
+#define MANY(x...) x
+#else
+#define FEW(x...) x
+#define MANY(x...)
+#endif
+
+@ voting lock for first-man coordination
+
+.macro voting_begin rbase:req, rcpu:req, rscratch:req
+	mov	\rscratch, #1
+	strb	\rscratch, [\rbase, \rcpu]
+	dmb
+.endm
+
+.macro voting_end rbase:req, rcpu:req, rscratch:req
+	dmb
+	mov	\rscratch, #0
+	strb	\rscratch, [\rbase, \rcpu]
+	dsb	st
+	sev
+.endm
+
+/*
+ * The vlock structure must reside in Strongly-Ordered or Device memory.
+ * This implementation deliberately eliminates most of the barriers which
+ * would be required for other memory types, and assumes that independent
+ * writes to neighbouring locations within a cacheline do not interfere
+ * with one another.
+ */
+
+@ r0: lock structure base
+@ r1: CPU ID (0-based index within cluster)
+ENTRY(vlock_trylock)
+	add	r1, r1, #VLOCK_VOTING_OFFSET
+
+	voting_begin	r0, r1, r2
+
+	ldrb	r2, [r0, #VLOCK_OWNER_OFFSET]	@ check whether lock is held
+	cmp	r2, #VLOCK_OWNER_NONE
+	bne	trylock_fail			@ fail if so
+
+	@ Control dependency implies strb not observable before previous ldrb.
+
+	strb	r1, [r0, #VLOCK_OWNER_OFFSET]	@ submit my vote
+
+	voting_end	r0, r1, r2		@ implies DMB
+
+	@ Wait for the current round of voting to finish:
+
+ MANY(	mov	r3, #VLOCK_VOTING_OFFSET			)
+0:
+ MANY(	ldr	r2, [r0, r3]					)
+ FEW(	ldr	r2, [r0, #VLOCK_VOTING_OFFSET]			)
+	cmp	r2, #0
+	wfene
+	bne	0b
+ MANY(	add	r3, r3, #4					)
+ MANY(	cmp	r3, #VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE	)
+ MANY(	bne	0b						)
+
+	@ Check who won:
+
+	dmb
+	ldrb	r2, [r0, #VLOCK_OWNER_OFFSET]
+	eor	r0, r1, r2			@ zero if I won, else nonzero
+	bx	lr
+
+trylock_fail:
+	voting_end	r0, r1, r2
+	mov	r0, #1				@ nonzero indicates that I lost
+	bx	lr
+ENDPROC(vlock_trylock)
+
+@ r0: lock structure base
+ENTRY(vlock_unlock)
+	dmb
+	mov	r1, #VLOCK_OWNER_NONE
+	strb	r1, [r0, #VLOCK_OWNER_OFFSET]
+	dsb	st
+	sev
+	bx	lr
+ENDPROC(vlock_unlock)
diff --git a/arch/arm/common/vlock.h b/arch/arm/common/vlock.h
new file mode 100644
index 00000000000..3b441475a59
--- /dev/null
+++ b/arch/arm/common/vlock.h
@@ -0,0 +1,29 @@
+/*
+ * vlock.h - simple voting lock implementation
+ *
+ * Created by:	Dave Martin, 2012-08-16
+ * Copyright:	(C) 2012-2013  Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __VLOCK_H
+#define __VLOCK_H
+
+#include <asm/mcpm.h>
+
+/* Offsets and sizes are rounded to a word (4 bytes) */
+#define VLOCK_OWNER_OFFSET	0
+#define VLOCK_VOTING_OFFSET	4
+#define VLOCK_VOTING_SIZE	((MAX_CPUS_PER_CLUSTER + 3) / 4 * 4)
+#define VLOCK_SIZE		(VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE)
+#define VLOCK_OWNER_NONE	0
+
+#endif /* ! __VLOCK_H */