70 files changed, 18098 insertions, 3275 deletions
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 3157071e241..9978f594cac 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -1,3 +1,59 @@
+config PPC_CELL
+	bool
+	default n
+
+config PPC_CELL_COMMON
+	bool
+	select PPC_CELL
+	select PPC_DCR_MMIO
+	select PPC_INDIRECT_PIO
+	select PPC_INDIRECT_MMIO
+	select PPC_NATIVE
+	select PPC_RTAS
+	select IRQ_EDGE_EOI_HANDLER
+
+config PPC_CELL_NATIVE
+	bool
+	select PPC_CELL_COMMON
+	select MPIC
+	select PPC_IO_WORKAROUNDS
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII #test only
+	select IBM_EMAC_TAH  #test only
+	default n
+
+config PPC_IBM_CELL_BLADE
+	bool "IBM Cell Blade"
+	depends on PPC64 && PPC_BOOK3S
+	select PPC_CELL_NATIVE
+	select PPC_OF_PLATFORM_PCI
+	select PCI
+	select MMIO_NVRAM
+	select PPC_UDBG_16550
+	select UDBG_RTAS_CONSOLE
+
+config PPC_CELLEB
+	bool "Toshiba's Cell Reference Set 'Celleb' Architecture"
+	depends on PPC64 && PPC_BOOK3S
+	select PPC_CELL_NATIVE
+	select PPC_OF_PLATFORM_PCI
+	select PCI
+	select HAS_TXX9_SERIAL
+	select PPC_UDBG_BEAT
+	select USB_OHCI_BIG_ENDIAN_MMIO
+	select USB_EHCI_BIG_ENDIAN_MMIO
+
+config PPC_CELL_QPACE
+	bool "IBM Cell - QPACE"
+	depends on PPC64 && PPC_BOOK3S
+	select PPC_CELL_COMMON
+
+config AXON_MSI
+	bool
+	depends on PPC_IBM_CELL_BLADE && PCI_MSI
+	default y
+
 menu "Cell Broadband Engine options"
 	depends on PPC_CELL
 
@@ -5,9 +61,79 @@ config SPU_FS
 	tristate "SPU file system"
 	default m
 	depends on PPC_CELL
+	select SPU_BASE
+	select MEMORY_HOTPLUG
 	help
 	  The SPU file system is used to access Synergistic Processing
 	  Units on machines implementing the Broadband Processor
 	  Architecture.
 
+config SPU_FS_64K_LS
+	bool "Use 64K pages to map SPE local  store"
+	# we depend on PPC_MM_SLICES for now rather than selecting
+	# it because we depend on hugetlbfs hooks being present. We
+	# will fix that when the generic code has been improved to
+	# not require hijacking hugetlbfs hooks.
+	depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES
+	default y
+	select PPC_HAS_HASH_64K
+	help
+	  This option causes SPE local stores to be mapped in process
+	  address spaces using 64K pages while the rest of the kernel
+	  uses 4K pages. This can improve performances of applications
+	  using multiple SPEs by lowering the TLB pressure on them.
+
+config SPU_BASE
+	bool
+	default n
+
+config CBE_RAS
+	bool "RAS features for bare metal Cell BE"
+	depends on PPC_CELL_NATIVE
+	default y
+
+config PPC_IBM_CELL_RESETBUTTON
+	bool "IBM Cell Blade Pinhole reset button"
+	depends on CBE_RAS && PPC_IBM_CELL_BLADE
+	default y
+	help
+	  Support Pinhole Resetbutton on IBM Cell blades.
+	  This adds a method to trigger system reset via front panel pinhole button.
+
+config PPC_IBM_CELL_POWERBUTTON
+	tristate "IBM Cell Blade power button"
+	depends on PPC_IBM_CELL_BLADE && INPUT_EVDEV
+	default y
+	help
+	  Support Powerbutton on IBM Cell blades.
+	  This will enable the powerbutton as an input device.
+
+config CBE_THERM
+	tristate "CBE thermal support"
+	default m
+	depends on CBE_RAS && SPU_BASE
+
+config PPC_PMI
+	tristate
+	default y
+	depends on CPU_FREQ_CBE_PMI || PPC_IBM_CELL_POWERBUTTON
+	help
+	  PMI (Platform Management Interrupt) is a way to
+	  communicate with the BMC (Baseboard Management Controller).
+	  It is used in some IBM Cell blades.
+
+config CBE_CPUFREQ_SPU_GOVERNOR
+	tristate "CBE frequency scaling based on SPU usage"
+	depends on SPU_FS && CPU_FREQ
+	default m
+	help
+	  This governor checks for spu usage to adjust the cpu frequency.
+	  If no spu is running on a given cpu, that cpu will be throttled to
+	  the minimal possible frequency.
+
 endmenu
+
+config OPROFILE_CELL
+	def_bool y
+	depends on PPC_CELL_NATIVE && (OPROFILE = m || OPROFILE = y) && SPU_BASE
+
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 16031b565be..fe053e7c73e 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -1,10 +1,46 @@
-obj-y			+= interrupt.o iommu.o setup.o spider-pic.o
-obj-y			+= pervasive.o
+obj-$(CONFIG_PPC_CELL_COMMON)		+= cbe_regs.o interrupt.o pervasive.o
 
-obj-$(CONFIG_SMP)	+= smp.o
-obj-$(CONFIG_SPU_FS)	+= spufs/ spu-base.o
+obj-$(CONFIG_PPC_CELL_NATIVE)		+= iommu.o setup.o spider-pic.o \
+					   pmu.o spider-pci.o
+obj-$(CONFIG_CBE_RAS)			+= ras.o
 
-spu-base-y		+= spu_base.o spu_priv1.o
+obj-$(CONFIG_CBE_THERM)			+= cbe_thermal.o
+obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR)	+= cpufreq_spudemand.o
 
-builtin-spufs-$(CONFIG_SPU_FS)	+= spu_syscalls.o
-obj-y			+= $(builtin-spufs-m)
+obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON)	+= cbe_powerbutton.o
+
+ifeq ($(CONFIG_SMP),y)
+obj-$(CONFIG_PPC_CELL_NATIVE)		+= smp.o
+obj-$(CONFIG_PPC_CELL_QPACE)		+= smp.o
+endif
+
+# needed only when building loadable spufs.ko
+spu-priv1-$(CONFIG_PPC_CELL_COMMON)	+= spu_priv1_mmio.o
+spu-manage-$(CONFIG_PPC_CELL_COMMON)	+= spu_manage.o
+
+obj-$(CONFIG_SPU_BASE)			+= spu_callbacks.o spu_base.o \
+					   spu_notify.o \
+					   spu_syscalls.o spu_fault.o \
+					   $(spu-priv1-y) \
+					   $(spu-manage-y) \
+					   spufs/
+
+obj-$(CONFIG_AXON_MSI)			+= axon_msi.o
+
+# qpace setup
+obj-$(CONFIG_PPC_CELL_QPACE)		+= qpace_setup.o
+
+# celleb stuff
+ifeq ($(CONFIG_PPC_CELLEB),y)
+obj-y					+= celleb_setup.o \
+					   celleb_pci.o celleb_scc_epci.o \
+					   celleb_scc_pciex.o \
+					   celleb_scc_uhc.o \
+					   spider-pci.o beat.o beat_htab.o \
+					   beat_hvCall.o beat_interrupt.o \
+					   beat_iommu.o
+
+obj-$(CONFIG_PPC_UDBG_BEAT)		+= beat_udbg.o
+obj-$(CONFIG_SERIAL_TXX9)		+= celleb_scc_sio.o
+obj-$(CONFIG_SPU_BASE)			+= beat_spu_priv1.o
+endif
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
new file mode 100644
index 00000000000..85825b5401e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -0,0 +1,502 @@
+/*
+ * Copyright 2007, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/export.h>
+#include <linux/of_platform.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+#include <asm/dcr.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+
+
+/*
+ * MSIC registers, specified as offsets from dcr_base
+ */
+#define MSIC_CTRL_REG	0x0
+
+/* Base Address registers specify FIFO location in BE memory */
+#define MSIC_BASE_ADDR_HI_REG	0x3
+#define MSIC_BASE_ADDR_LO_REG	0x4
+
+/* Hold the read/write offsets into the FIFO */
+#define MSIC_READ_OFFSET_REG	0x5
+#define MSIC_WRITE_OFFSET_REG	0x6
+
+
+/* MSIC control register flags */
+#define MSIC_CTRL_ENABLE		0x0001
+#define MSIC_CTRL_FIFO_FULL_ENABLE	0x0002
+#define MSIC_CTRL_IRQ_ENABLE		0x0008
+#define MSIC_CTRL_FULL_STOP_ENABLE	0x0010
+
+/*
+ * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
+ * Currently we're using a 64KB FIFO size.
+ */
+#define MSIC_FIFO_SIZE_SHIFT	16
+#define MSIC_FIFO_SIZE_BYTES	(1 << MSIC_FIFO_SIZE_SHIFT)
+
+/*
+ * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
+ * 8-9 of the MSIC control reg.
+ */
+#define MSIC_CTRL_FIFO_SIZE	(((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
+
+/*
+ * We need to mask the read/write offsets to make sure they stay within
+ * the bounds of the FIFO. Also they should always be 16-byte aligned.
+ */
+#define MSIC_FIFO_SIZE_MASK	((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
+
+/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
+#define MSIC_FIFO_ENTRY_SIZE	0x10
+
+
+struct axon_msic {
+	struct irq_domain *irq_domain;
+	__le32 *fifo_virt;
+	dma_addr_t fifo_phys;
+	dcr_host_t dcr_host;
+	u32 read_offset;
+#ifdef DEBUG
+	u32 __iomem *trigger;
+#endif
+};
+
+#ifdef DEBUG
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic);
+#else
+static inline void axon_msi_debug_setup(struct device_node *dn,
+					struct axon_msic *msic) { }
+#endif
+
+
+static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
+{
+	pr_devel("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
+
+	dcr_write(msic->dcr_host, dcr_n, val);
+}
+
+static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct axon_msic *msic = irq_get_handler_data(irq);
+	u32 write_offset, msi;
+	int idx;
+	int retry = 0;
+
+	write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
+	pr_devel("axon_msi: original write_offset 0x%x\n", write_offset);
+
+	/* write_offset doesn't wrap properly, so we have to mask it */
+	write_offset &= MSIC_FIFO_SIZE_MASK;
+
+	while (msic->read_offset != write_offset && retry < 100) {
+		idx  = msic->read_offset / sizeof(__le32);
+		msi  = le32_to_cpu(msic->fifo_virt[idx]);
+		msi &= 0xFFFF;
+
+		pr_devel("axon_msi: woff %x roff %x msi %x\n",
+			  write_offset, msic->read_offset, msi);
+
+		if (msi < nr_irqs && irq_get_chip_data(msi) == msic) {
+			generic_handle_irq(msi);
+			msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
+		} else {
+			/*
+			 * Reading the MSIC_WRITE_OFFSET_REG does not
+			 * reliably flush the outstanding DMA to the
+			 * FIFO buffer. Here we were reading stale
+			 * data, so we need to retry.
+			 */
+			udelay(1);
+			retry++;
+			pr_devel("axon_msi: invalid irq 0x%x!\n", msi);
+			continue;
+		}
+
+		if (retry) {
+			pr_devel("axon_msi: late irq 0x%x, retry %d\n",
+				 msi, retry);
+			retry = 0;
+		}
+
+		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+	}
+
+	if (retry) {
+		printk(KERN_WARNING "axon_msi: irq timed out\n");
+
+		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+	}
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static struct axon_msic *find_msi_translator(struct pci_dev *dev)
+{
+	struct irq_domain *irq_domain;
+	struct device_node *dn, *tmp;
+	const phandle *ph;
+	struct axon_msic *msic = NULL;
+
+	dn = of_node_get(pci_device_to_OF_node(dev));
+	if (!dn) {
+		dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+		return NULL;
+	}
+
+	for (; dn; dn = of_get_next_parent(dn)) {
+		ph = of_get_property(dn, "msi-translator", NULL);
+		if (ph)
+			break;
+	}
+
+	if (!ph) {
+		dev_dbg(&dev->dev,
+			"axon_msi: no msi-translator property found\n");
+		goto out_error;
+	}
+
+	tmp = dn;
+	dn = of_find_node_by_phandle(*ph);
+	of_node_put(tmp);
+	if (!dn) {
+		dev_dbg(&dev->dev,
+			"axon_msi: msi-translator doesn't point to a node\n");
+		goto out_error;
+	}
+
+	irq_domain = irq_find_host(dn);
+	if (!irq_domain) {
+		dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %s\n",
+			dn->full_name);
+		goto out_error;
+	}
+
+	msic = irq_domain->host_data;
+
+out_error:
+	of_node_put(dn);
+
+	return msic;
+}
+
+static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type)
+{
+	if (!find_msi_translator(dev))
+		return -ENODEV;
+
+	return 0;
+}
+
+static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
+{
+	struct device_node *dn;
+	struct msi_desc *entry;
+	int len;
+	const u32 *prop;
+
+	dn = of_node_get(pci_device_to_OF_node(dev));
+	if (!dn) {
+		dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+		return -ENODEV;
+	}
+
+	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
+
+	for (; dn; dn = of_get_next_parent(dn)) {
+		if (entry->msi_attrib.is_64) {
+			prop = of_get_property(dn, "msi-address-64", &len);
+			if (prop)
+				break;
+		}
+
+		prop = of_get_property(dn, "msi-address-32", &len);
+		if (prop)
+			break;
+	}
+
+	if (!prop) {
+		dev_dbg(&dev->dev,
+			"axon_msi: no msi-address-(32|64) properties found\n");
+		return -ENOENT;
+	}
+
+	switch (len) {
+	case 8:
+		msg->address_hi = prop[0];
+		msg->address_lo = prop[1];
+		break;
+	case 4:
+		msg->address_hi = 0;
+		msg->address_lo = prop[0];
+		break;
+	default:
+		dev_dbg(&dev->dev,
+			"axon_msi: malformed msi-address-(32|64) property\n");
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	of_node_put(dn);
+
+	return 0;
+}
+
+static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	unsigned int virq, rc;
+	struct msi_desc *entry;
+	struct msi_msg msg;
+	struct axon_msic *msic;
+
+	msic = find_msi_translator(dev);
+	if (!msic)
+		return -ENODEV;
+
+	rc = setup_msi_msg_address(dev, &msg);
+	if (rc)
+		return rc;
+
+	list_for_each_entry(entry, &dev->msi_list, list) {
+		virq = irq_create_direct_mapping(msic->irq_domain);
+		if (virq == NO_IRQ) {
+			dev_warn(&dev->dev,
+				 "axon_msi: virq allocation failed!\n");
+			return -1;
+		}
+		dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
+
+		irq_set_msi_desc(virq, entry);
+		msg.data = virq;
+		write_msi_msg(virq, &msg);
+	}
+
+	return 0;
+}
+
+static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct msi_desc *entry;
+
+	dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
+
+	list_for_each_entry(entry, &dev->msi_list, list) {
+		if (entry->irq == NO_IRQ)
+			continue;
+
+		irq_set_msi_desc(entry->irq, NULL);
+		irq_dispose_mapping(entry->irq);
+	}
+}
+
+static struct irq_chip msic_irq_chip = {
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
+	.irq_shutdown	= mask_msi_irq,
+	.name		= "AXON-MSI",
+};
+
+static int msic_host_map(struct irq_domain *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops msic_host_ops = {
+	.map	= msic_host_map,
+};
+
+static void axon_msi_shutdown(struct platform_device *device)
+{
+	struct axon_msic *msic = dev_get_drvdata(&device->dev);
+	u32 tmp;
+
+	pr_devel("axon_msi: disabling %s\n",
+		  msic->irq_domain->of_node->full_name);
+	tmp  = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
+	tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
+	msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
+}
+
+static int axon_msi_probe(struct platform_device *device)
+{
+	struct device_node *dn = device->dev.of_node;
+	struct axon_msic *msic;
+	unsigned int virq;
+	int dcr_base, dcr_len;
+
+	pr_devel("axon_msi: setting up dn %s\n", dn->full_name);
+
+	msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
+	if (!msic) {
+		printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
+		       dn->full_name);
+		goto out;
+	}
+
+	dcr_base = dcr_resource_start(dn, 0);
+	dcr_len = dcr_resource_len(dn, 0);
+
+	if (dcr_base == 0 || dcr_len == 0) {
+		printk(KERN_ERR
+		       "axon_msi: couldn't parse dcr properties on %s\n",
+			dn->full_name);
+		goto out_free_msic;
+	}
+
+	msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
+	if (!DCR_MAP_OK(msic->dcr_host)) {
+		printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
+		       dn->full_name);
+		goto out_free_msic;
+	}
+
+	msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES,
+					     &msic->fifo_phys, GFP_KERNEL);
+	if (!msic->fifo_virt) {
+		printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
+		       dn->full_name);
+		goto out_free_msic;
+	}
+
+	virq = irq_of_parse_and_map(dn, 0);
+	if (virq == NO_IRQ) {
+		printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
+		       dn->full_name);
+		goto out_free_fifo;
+	}
+	memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
+
+	/* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */
+	msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic);
+	if (!msic->irq_domain) {
+		printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %s\n",
+		       dn->full_name);
+		goto out_free_fifo;
+	}
+
+	irq_set_handler_data(virq, msic);
+	irq_set_chained_handler(virq, axon_msi_cascade);
+	pr_devel("axon_msi: irq 0x%x setup for axon_msi\n", virq);
+
+	/* Enable the MSIC hardware */
+	msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, msic->fifo_phys >> 32);
+	msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
+				  msic->fifo_phys & 0xFFFFFFFF);
+	msic_dcr_write(msic, MSIC_CTRL_REG,
+			MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
+			MSIC_CTRL_FIFO_SIZE);
+
+	msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG)
+				& MSIC_FIFO_SIZE_MASK;
+
+	dev_set_drvdata(&device->dev, msic);
+
+	ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
+	ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
+	ppc_md.msi_check_device = axon_msi_check_device;
+
+	axon_msi_debug_setup(dn, msic);
+
+	printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
+
+	return 0;
+
+out_free_fifo:
+	dma_free_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, msic->fifo_virt,
+			  msic->fifo_phys);
+out_free_msic:
+	kfree(msic);
+out:
+
+	return -1;
+}
+
+static const struct of_device_id axon_msi_device_id[] = {
+	{
+		.compatible	= "ibm,axon-msic"
+	},
+	{}
+};
+
+static struct platform_driver axon_msi_driver = {
+	.probe		= axon_msi_probe,
+	.shutdown	= axon_msi_shutdown,
+	.driver = {
+		.name = "axon-msi",
+		.owner = THIS_MODULE,
+		.of_match_table = axon_msi_device_id,
+	},
+};
+
+static int __init axon_msi_init(void)
+{
+	return platform_driver_register(&axon_msi_driver);
+}
+subsys_initcall(axon_msi_init);
+
+
+#ifdef DEBUG
+static int msic_set(void *data, u64 val)
+{
+	struct axon_msic *msic = data;
+	out_le32(msic->trigger, val);
+	return 0;
+}
+
+static int msic_get(void *data, u64 *val)
+{
+	*val = 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_msic, msic_get, msic_set, "%llu\n");
+
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
+{
+	char name[8];
+	u64 addr;
+
+	addr = of_translate_address(dn, of_get_property(dn, "reg", NULL));
+	if (addr == OF_BAD_ADDR) {
+		pr_devel("axon_msi: couldn't translate reg property\n");
+		return;
+	}
+
+	msic->trigger = ioremap(addr, 0x4);
+	if (!msic->trigger) {
+		pr_devel("axon_msi: ioremap failed\n");
+		return;
+	}
+
+	snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
+
+	if (!debugfs_create_file(name, 0600, powerpc_debugfs_root,
+				 msic, &fops_msic)) {
+		pr_devel("axon_msi: debugfs_create_file failed!\n");
+		return;
+	}
+}
+#endif /* DEBUG */
diff --git a/arch/powerpc/platforms/cell/beat.c b/arch/powerpc/platforms/cell/beat.c
new file mode 100644
index 00000000000..affcf566d46
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat.c
@@ -0,0 +1,264 @@
+/*
+ * Simple routines for Celleb/Beat
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/rtc.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+#include <linux/reboot.h>
+
+#include <asm/hvconsole.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "beat_wrapper.h"
+#include "beat.h"
+#include "beat_interrupt.h"
+
+static int beat_pm_poweroff_flag;
+
+void beat_restart(char *cmd)
+{
+	beat_shutdown_logical_partition(!beat_pm_poweroff_flag);
+}
+
+void beat_power_off(void)
+{
+	beat_shutdown_logical_partition(0);
+}
+
+u64 beat_halt_code = 0x1000000000000000UL;
+EXPORT_SYMBOL(beat_halt_code);
+
+void beat_halt(void)
+{
+	beat_shutdown_logical_partition(beat_halt_code);
+}
+
+int beat_set_rtc_time(struct rtc_time *rtc_time)
+{
+	u64 tim;
+	tim = mktime(rtc_time->tm_year+1900,
+		     rtc_time->tm_mon+1, rtc_time->tm_mday,
+		     rtc_time->tm_hour, rtc_time->tm_min, rtc_time->tm_sec);
+	if (beat_rtc_write(tim))
+		return -1;
+	return 0;
+}
+
+void beat_get_rtc_time(struct rtc_time *rtc_time)
+{
+	u64 tim;
+
+	if (beat_rtc_read(&tim))
+		tim = 0;
+	to_tm(tim, rtc_time);
+	rtc_time->tm_year -= 1900;
+	rtc_time->tm_mon -= 1;
+}
+
+#define	BEAT_NVRAM_SIZE	4096
+
+ssize_t beat_nvram_read(char *buf, size_t count, loff_t *index)
+{
+	unsigned int i;
+	unsigned long len;
+	char *p = buf;
+
+	if (*index >= BEAT_NVRAM_SIZE)
+		return -ENODEV;
+	i = *index;
+	if (i + count > BEAT_NVRAM_SIZE)
+		count = BEAT_NVRAM_SIZE - i;
+
+	for (; count != 0; count -= len) {
+		len = count;
+		if (len > BEAT_NVRW_CNT)
+			len = BEAT_NVRW_CNT;
+		if (beat_eeprom_read(i, len, p))
+			return -EIO;
+
+		p += len;
+		i += len;
+	}
+	*index = i;
+	return p - buf;
+}
+
+ssize_t beat_nvram_write(char *buf, size_t count, loff_t *index)
+{
+	unsigned int i;
+	unsigned long len;
+	char *p = buf;
+
+	if (*index >= BEAT_NVRAM_SIZE)
+		return -ENODEV;
+	i = *index;
+	if (i + count > BEAT_NVRAM_SIZE)
+		count = BEAT_NVRAM_SIZE - i;
+
+	for (; count != 0; count -= len) {
+		len = count;
+		if (len > BEAT_NVRW_CNT)
+			len = BEAT_NVRW_CNT;
+		if (beat_eeprom_write(i, len, p))
+			return -EIO;
+
+		p += len;
+		i += len;
+	}
+	*index = i;
+	return p - buf;
+}
+
+ssize_t beat_nvram_get_size(void)
+{
+	return BEAT_NVRAM_SIZE;
+}
+
+int beat_set_xdabr(unsigned long dabr, unsigned long dabrx)
+{
+	if (beat_set_dabr(dabr, dabrx))
+		return -1;
+	return 0;
+}
+
+int64_t beat_get_term_char(u64 vterm, u64 *len, u64 *t1, u64 *t2)
+{
+	u64 db[2];
+	s64 ret;
+
+	ret = beat_get_characters_from_console(vterm, len, (u8 *)db);
+	if (ret == 0) {
+		*t1 = db[0];
+		*t2 = db[1];
+	}
+	return ret;
+}
+EXPORT_SYMBOL(beat_get_term_char);
+
+int64_t beat_put_term_char(u64 vterm, u64 len, u64 t1, u64 t2)
+{
+	u64 db[2];
+
+	db[0] = t1;
+	db[1] = t2;
+	return beat_put_characters_to_console(vterm, len, (u8 *)db);
+}
+EXPORT_SYMBOL(beat_put_term_char);
+
+void beat_power_save(void)
+{
+	beat_pause(0);
+}
+
+#ifdef CONFIG_KEXEC
+void beat_kexec_cpu_down(int crash, int secondary)
+{
+	beatic_deinit_IRQ();
+}
+#endif
+
+static irqreturn_t beat_power_event(int virq, void *arg)
+{
+	printk(KERN_DEBUG "Beat: power button pressed\n");
+	beat_pm_poweroff_flag = 1;
+	ctrl_alt_del();
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t beat_reset_event(int virq, void *arg)
+{
+	printk(KERN_DEBUG "Beat: reset button pressed\n");
+	beat_pm_poweroff_flag = 0;
+	ctrl_alt_del();
+	return IRQ_HANDLED;
+}
+
+static struct beat_event_list {
+	const char *typecode;
+	irq_handler_t handler;
+	unsigned int virq;
+} beat_event_list[] = {
+	{ "power", beat_power_event, 0 },
+	{ "reset", beat_reset_event, 0 },
+};
+
+static int __init beat_register_event(void)
+{
+	u64 path[4], data[2];
+	int rc, i;
+	unsigned int virq;
+
+	for (i = 0; i < ARRAY_SIZE(beat_event_list); i++) {
+		struct beat_event_list *ev = &beat_event_list[i];
+
+		if (beat_construct_event_receive_port(data) != 0) {
+			printk(KERN_ERR "Beat: "
+			       "cannot construct event receive port for %s\n",
+			       ev->typecode);
+			return -EINVAL;
+		}
+
+		virq = irq_create_mapping(NULL, data[0]);
+		if (virq == NO_IRQ) {
+			printk(KERN_ERR "Beat: failed to get virtual IRQ"
+			       " for event receive port for %s\n",
+			       ev->typecode);
+			beat_destruct_event_receive_port(data[0]);
+			return -EIO;
+		}
+		ev->virq = virq;
+
+		rc = request_irq(virq, ev->handler, 0,
+				      ev->typecode, NULL);
+		if (rc != 0) {
+			printk(KERN_ERR "Beat: failed to request virtual IRQ"
+			       " for event receive port for %s\n",
+			       ev->typecode);
+			beat_destruct_event_receive_port(data[0]);
+			return rc;
+		}
+
+		path[0] = 0x1000000065780000ul;	/* 1,ex */
+		path[1] = 0x627574746f6e0000ul;	/* button */
+		path[2] = 0;
+		strncpy((char *)&path[2], ev->typecode, 8);
+		path[3] = 0;
+		data[1] = 0;
+
+		beat_create_repository_node(path, data);
+	}
+	return 0;
+}
+
+static int __init beat_event_init(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_BEAT))
+		return -EINVAL;
+
+	beat_pm_poweroff_flag = 0;
+	return beat_register_event();
+}
+
+device_initcall(beat_event_init);
diff --git a/arch/powerpc/platforms/cell/beat.h b/arch/powerpc/platforms/cell/beat.h
new file mode 100644
index 00000000000..bfcb8e351ae
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat.h
@@ -0,0 +1,39 @@
+/*
+ * Guest OS Interfaces.
+ *
+ * (C) Copyright 2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _CELLEB_BEAT_H
+#define _CELLEB_BEAT_H
+
+int64_t beat_get_term_char(uint64_t, uint64_t *, uint64_t *, uint64_t *);
+int64_t beat_put_term_char(uint64_t, uint64_t, uint64_t, uint64_t);
+int64_t beat_repository_encode(int, const char *, uint64_t[4]);
+void beat_restart(char *);
+void beat_power_off(void);
+void beat_halt(void);
+int beat_set_rtc_time(struct rtc_time *);
+void beat_get_rtc_time(struct rtc_time *);
+ssize_t beat_nvram_get_size(void);
+ssize_t beat_nvram_read(char *, size_t, loff_t *);
+ssize_t beat_nvram_write(char *, size_t, loff_t *);
+int beat_set_xdabr(unsigned long, unsigned long);
+void beat_power_save(void);
+void beat_kexec_cpu_down(int, int);
+
+#endif /* _CELLEB_BEAT_H */
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
new file mode 100644
index 00000000000..d4d245c0d78
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -0,0 +1,445 @@
+/*
+ * "Cell Reference Set" HTAB support.
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/pseries/lpar.c:
+ *  Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG_LOW
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+
+#include "beat_wrapper.h"
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while (0)
+#else
+#define DBG_LOW(fmt...) do { } while (0)
+#endif
+
+static DEFINE_RAW_SPINLOCK(beat_htab_lock);
+
+static inline unsigned int beat_read_mask(unsigned hpte_group)
+{
+	unsigned long rmask = 0;
+	u64 hpte_v[5];
+
+	beat_read_htab_entries(0, hpte_group + 0, hpte_v);
+	if (!(hpte_v[0] & HPTE_V_BOLTED))
+		rmask |= 0x8000;
+	if (!(hpte_v[1] & HPTE_V_BOLTED))
+		rmask |= 0x4000;
+	if (!(hpte_v[2] & HPTE_V_BOLTED))
+		rmask |= 0x2000;
+	if (!(hpte_v[3] & HPTE_V_BOLTED))
+		rmask |= 0x1000;
+	beat_read_htab_entries(0, hpte_group + 4, hpte_v);
+	if (!(hpte_v[0] & HPTE_V_BOLTED))
+		rmask |= 0x0800;
+	if (!(hpte_v[1] & HPTE_V_BOLTED))
+		rmask |= 0x0400;
+	if (!(hpte_v[2] & HPTE_V_BOLTED))
+		rmask |= 0x0200;
+	if (!(hpte_v[3] & HPTE_V_BOLTED))
+		rmask |= 0x0100;
+	hpte_group = ~hpte_group & (htab_hash_mask * HPTES_PER_GROUP);
+	beat_read_htab_entries(0, hpte_group + 0, hpte_v);
+	if (!(hpte_v[0] & HPTE_V_BOLTED))
+		rmask |= 0x80;
+	if (!(hpte_v[1] & HPTE_V_BOLTED))
+		rmask |= 0x40;
+	if (!(hpte_v[2] & HPTE_V_BOLTED))
+		rmask |= 0x20;
+	if (!(hpte_v[3] & HPTE_V_BOLTED))
+		rmask |= 0x10;
+	beat_read_htab_entries(0, hpte_group + 4, hpte_v);
+	if (!(hpte_v[0] & HPTE_V_BOLTED))
+		rmask |= 0x08;
+	if (!(hpte_v[1] & HPTE_V_BOLTED))
+		rmask |= 0x04;
+	if (!(hpte_v[2] & HPTE_V_BOLTED))
+		rmask |= 0x02;
+	if (!(hpte_v[3] & HPTE_V_BOLTED))
+		rmask |= 0x01;
+	return rmask;
+}
+
+static long beat_lpar_hpte_insert(unsigned long hpte_group,
+				  unsigned long vpn, unsigned long pa,
+				  unsigned long rflags, unsigned long vflags,
+				  int psize, int apsize, int ssize)
+{
+	unsigned long lpar_rc;
+	u64 hpte_v, hpte_r, slot;
+
+	if (vflags & HPTE_V_SECONDARY)
+		return -1;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+			"rflags=%lx, vflags=%lx, psize=%d)\n",
+		hpte_group, va, pa, rflags, vflags, psize);
+
+	hpte_v = hpte_encode_v(vpn, psize, apsize, MMU_SEGSIZE_256M) |
+		vflags | HPTE_V_VALID;
+	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+	if (rflags & _PAGE_NO_CACHE)
+		hpte_r &= ~HPTE_R_M;
+
+	raw_spin_lock(&beat_htab_lock);
+	lpar_rc = beat_read_mask(hpte_group);
+	if (lpar_rc == 0) {
+		if (!(vflags & HPTE_V_BOLTED))
+			DBG_LOW(" full\n");
+		raw_spin_unlock(&beat_htab_lock);
+		return -1;
+	}
+
+	lpar_rc = beat_insert_htab_entry(0, hpte_group, lpar_rc << 48,
+		hpte_v, hpte_r, &slot);
+	raw_spin_unlock(&beat_htab_lock);
+
+	/*
+	 * Since we try and ioremap PHBs we don't own, the pte insert
+	 * will fail. However we must catch the failure in hash_page
+	 * or we will loop forever, so return -2 in this case.
+	 */
+	if (unlikely(lpar_rc != 0)) {
+		if (!(vflags & HPTE_V_BOLTED))
+			DBG_LOW(" lpar err %lx\n", lpar_rc);
+		return -2;
+	}
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW(" -> slot: %lx\n", slot);
+
+	/* We have to pass down the secondary bucket bit here as well */
+	return (slot ^ hpte_group) & 15;
+}
+
+static long beat_lpar_hpte_remove(unsigned long hpte_group)
+{
+	DBG_LOW("hpte_remove(group=%lx)\n", hpte_group);
+	return -1;
+}
+
+static unsigned long beat_lpar_hpte_getword0(unsigned long slot)
+{
+	unsigned long dword0;
+	unsigned long lpar_rc;
+	u64 dword[5];
+
+	lpar_rc = beat_read_htab_entries(0, slot & ~3UL, dword);
+
+	dword0 = dword[slot&3];
+
+	BUG_ON(lpar_rc != 0);
+
+	return dword0;
+}
+
+static void beat_lpar_hptab_clear(void)
+{
+	unsigned long size_bytes = 1UL << ppc64_pft_size;
+	unsigned long hpte_count = size_bytes >> 4;
+	int i;
+	u64 dummy0, dummy1;
+
+	/* TODO: Use bulk call */
+	for (i = 0; i < hpte_count; i++)
+		beat_write_htab_entry(0, i, 0, 0, -1UL, -1UL, &dummy0, &dummy1);
+}
+
+/*
+ * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
+ * the low 3 bits of flags happen to line up.  So no transform is needed.
+ * We can probably optimize here and assume the high bits of newpp are
+ * already zero.  For now I am paranoid.
+ */
+static long beat_lpar_hpte_updatepp(unsigned long slot,
+				    unsigned long newpp,
+				    unsigned long vpn,
+				    int psize, int apsize,
+				    int ssize, int local)
+{
+	unsigned long lpar_rc;
+	u64 dummy0, dummy1;
+	unsigned long want_v;
+
+	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
+
+	DBG_LOW("    update: "
+		"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
+		want_v & HPTE_V_AVPN, slot, psize, newpp);
+
+	raw_spin_lock(&beat_htab_lock);
+	dummy0 = beat_lpar_hpte_getword0(slot);
+	if ((dummy0 & ~0x7FUL) != (want_v & ~0x7FUL)) {
+		DBG_LOW("not found !\n");
+		raw_spin_unlock(&beat_htab_lock);
+		return -1;
+	}
+
+	lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0,
+					&dummy1);
+	raw_spin_unlock(&beat_htab_lock);
+	if (lpar_rc != 0 || dummy0 == 0) {
+		DBG_LOW("not found !\n");
+		return -1;
+	}
+
+	DBG_LOW("ok %lx %lx\n", dummy0, dummy1);
+
+	BUG_ON(lpar_rc != 0);
+
+	return 0;
+}
+
+static long beat_lpar_hpte_find(unsigned long vpn, int psize)
+{
+	unsigned long hash;
+	unsigned long i, j;
+	long slot;
+	unsigned long want_v, hpte_v;
+
+	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
+
+	for (j = 0; j < 2; j++) {
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		for (i = 0; i < HPTES_PER_GROUP; i++) {
+			hpte_v = beat_lpar_hpte_getword0(slot);
+
+			if (HPTE_V_COMPARE(hpte_v, want_v)
+			    && (hpte_v & HPTE_V_VALID)
+			    && (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
+				/* HPTE matches */
+				if (j)
+					slot = -slot;
+				return slot;
+			}
+			++slot;
+		}
+		hash = ~hash;
+	}
+
+	return -1;
+}
+
+static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
+					  unsigned long ea,
+					  int psize, int ssize)
+{
+	unsigned long vpn;
+	unsigned long lpar_rc, slot, vsid;
+	u64 dummy0, dummy1;
+
+	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
+	vpn = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
+
+	raw_spin_lock(&beat_htab_lock);
+	slot = beat_lpar_hpte_find(vpn, psize);
+	BUG_ON(slot == -1);
+
+	lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7,
+		&dummy0, &dummy1);
+	raw_spin_unlock(&beat_htab_lock);
+
+	BUG_ON(lpar_rc != 0);
+}
+
+static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
+				      int psize, int apsize,
+				      int ssize, int local)
+{
+	unsigned long want_v;
+	unsigned long lpar_rc;
+	u64 dummy1, dummy2;
+	unsigned long flags;
+
+	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
+		slot, va, psize, local);
+	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
+
+	raw_spin_lock_irqsave(&beat_htab_lock, flags);
+	dummy1 = beat_lpar_hpte_getword0(slot);
+
+	if ((dummy1 & ~0x7FUL) != (want_v & ~0x7FUL)) {
+		DBG_LOW("not found !\n");
+		raw_spin_unlock_irqrestore(&beat_htab_lock, flags);
+		return;
+	}
+
+	lpar_rc = beat_write_htab_entry(0, slot, 0, 0, HPTE_V_VALID, 0,
+		&dummy1, &dummy2);
+	raw_spin_unlock_irqrestore(&beat_htab_lock, flags);
+
+	BUG_ON(lpar_rc != 0);
+}
+
+void __init hpte_init_beat(void)
+{
+	ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate;
+	ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp;
+	ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
+	ppc_md.hpte_insert	= beat_lpar_hpte_insert;
+	ppc_md.hpte_remove	= beat_lpar_hpte_remove;
+	ppc_md.hpte_clear_all	= beat_lpar_hptab_clear;
+}
+
+static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
+				  unsigned long vpn, unsigned long pa,
+				  unsigned long rflags, unsigned long vflags,
+				  int psize, int apsize, int ssize)
+{
+	unsigned long lpar_rc;
+	u64 hpte_v, hpte_r, slot;
+
+	if (vflags & HPTE_V_SECONDARY)
+		return -1;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW("hpte_insert(group=%lx, vpn=%016lx, pa=%016lx, "
+			"rflags=%lx, vflags=%lx, psize=%d)\n",
+		hpte_group, vpn, pa, rflags, vflags, psize);
+
+	hpte_v = hpte_encode_v(vpn, psize, apsize, MMU_SEGSIZE_256M) |
+		vflags | HPTE_V_VALID;
+	hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+	if (rflags & _PAGE_NO_CACHE)
+		hpte_r &= ~HPTE_R_M;
+
+	/* insert into not-volted entry */
+	lpar_rc = beat_insert_htab_entry3(0, hpte_group, hpte_v, hpte_r,
+		HPTE_V_BOLTED, 0, &slot);
+	/*
+	 * Since we try and ioremap PHBs we don't own, the pte insert
+	 * will fail. However we must catch the failure in hash_page
+	 * or we will loop forever, so return -2 in this case.
+	 */
+	if (unlikely(lpar_rc != 0)) {
+		if (!(vflags & HPTE_V_BOLTED))
+			DBG_LOW(" lpar err %lx\n", lpar_rc);
+		return -2;
+	}
+	if (!(vflags & HPTE_V_BOLTED))
+		DBG_LOW(" -> slot: %lx\n", slot);
+
+	/* We have to pass down the secondary bucket bit here as well */
+	return (slot ^ hpte_group) & 15;
+}
+
+/*
+ * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
+ * the low 3 bits of flags happen to line up.  So no transform is needed.
+ * We can probably optimize here and assume the high bits of newpp are
+ * already zero.  For now I am paranoid.
+ */
+static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
+				       unsigned long newpp,
+				       unsigned long vpn,
+				       int psize, int apsize,
+				       int ssize, int local)
+{
+	unsigned long lpar_rc;
+	unsigned long want_v;
+	unsigned long pss;
+
+	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
+	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc[psize];
+
+	DBG_LOW("    update: "
+		"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
+		want_v & HPTE_V_AVPN, slot, psize, newpp);
+
+	lpar_rc = beat_update_htab_permission3(0, slot, want_v, pss, 7, newpp);
+
+	if (lpar_rc == 0xfffffff7) {
+		DBG_LOW("not found !\n");
+		return -1;
+	}
+
+	DBG_LOW("ok\n");
+
+	BUG_ON(lpar_rc != 0);
+
+	return 0;
+}
+
+static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
+					 int psize, int apsize,
+					 int ssize, int local)
+{
+	unsigned long want_v;
+	unsigned long lpar_rc;
+	unsigned long pss;
+
+	DBG_LOW("    inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
+		slot, vpn, psize, local);
+	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
+	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc[psize];
+
+	lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss);
+
+	/* E_busy can be valid output: page may be already replaced */
+	BUG_ON(lpar_rc != 0 && lpar_rc != 0xfffffff7);
+}
+
+static int64_t _beat_lpar_hptab_clear_v3(void)
+{
+	return beat_clear_htab3(0);
+}
+
+static void beat_lpar_hptab_clear_v3(void)
+{
+	_beat_lpar_hptab_clear_v3();
+}
+
+void __init hpte_init_beat_v3(void)
+{
+	if (_beat_lpar_hptab_clear_v3() == 0) {
+		ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate_v3;
+		ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp_v3;
+		ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
+		ppc_md.hpte_insert	= beat_lpar_hpte_insert_v3;
+		ppc_md.hpte_remove	= beat_lpar_hpte_remove;
+		ppc_md.hpte_clear_all	= beat_lpar_hptab_clear_v3;
+	} else {
+		ppc_md.hpte_invalidate	= beat_lpar_hpte_invalidate;
+		ppc_md.hpte_updatepp	= beat_lpar_hpte_updatepp;
+		ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
+		ppc_md.hpte_insert	= beat_lpar_hpte_insert;
+		ppc_md.hpte_remove	= beat_lpar_hpte_remove;
+		ppc_md.hpte_clear_all	= beat_lpar_hptab_clear;
+	}
+}
diff --git a/arch/powerpc/platforms/cell/beat_hvCall.S b/arch/powerpc/platforms/cell/beat_hvCall.S
new file mode 100644
index 00000000000..96c80190712
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_hvCall.S
@@ -0,0 +1,285 @@
+/*
+ * Beat hypervisor call I/F
+ *
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/pseries/hvCall.S.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <asm/ppc_asm.h>
+
+/* Not implemented on Beat, now */
+#define	HCALL_INST_PRECALL
+#define	HCALL_INST_POSTCALL
+
+	.text
+
+#define	HVSC	.long	0x44000022
+
+/* Note: takes only 7 input parameters at maximum */
+_GLOBAL(beat_hcall_norets)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	mr	r11,r3
+	mr	r3,r4
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes 8 input parameters at maximum */
+_GLOBAL(beat_hcall_norets8)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	mr	r11,r3
+	mr	r3,r4
+	mr	r4,r5
+	mr	r5,r6
+	mr	r6,r7
+	mr	r7,r8
+	mr	r8,r9
+	ld	r10,STK_PARAM(R10)(r1)
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 1 output parameters at maximum */
+_GLOBAL(beat_hcall1)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 2 output parameters at maximum */
+_GLOBAL(beat_hcall2)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 3 output parameters at maximum */
+_GLOBAL(beat_hcall3)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 4 output parameters at maximum */
+_GLOBAL(beat_hcall4)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 5 output parameters at maximum */
+_GLOBAL(beat_hcall5)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+	std	r8, 32(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
+
+/* Note: takes only 6 input parameters, 6 output parameters at maximum */
+_GLOBAL(beat_hcall6)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+
+	HCALL_INST_PRECALL
+
+	std	r4,STK_PARAM(R4)(r1)	/* save ret buffer */
+
+	mr	r11,r3
+	mr	r3,r5
+	mr	r4,r6
+	mr	r5,r7
+	mr	r6,r8
+	mr	r7,r9
+	mr	r8,r10
+
+	HVSC				/* invoke the hypervisor */
+
+	HCALL_INST_POSTCALL
+
+	ld	r12,STK_PARAM(R4)(r1)
+	std	r4,  0(r12)
+	std	r5,  8(r12)
+	std	r6, 16(r12)
+	std	r7, 24(r12)
+	std	r8, 32(r12)
+	std	r9, 40(r12)
+
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+
+	blr				/* return r3 = status */
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
new file mode 100644
index 00000000000..9e5dfbcc00a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -0,0 +1,253 @@
+/*
+ * Celleb/Beat Interrupt controller
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+
+#include <asm/machdep.h>
+
+#include "beat_interrupt.h"
+#include "beat_wrapper.h"
+
+#define	MAX_IRQS	NR_IRQS
+static DEFINE_RAW_SPINLOCK(beatic_irq_mask_lock);
+static uint64_t	beatic_irq_mask_enable[(MAX_IRQS+255)/64];
+static uint64_t	beatic_irq_mask_ack[(MAX_IRQS+255)/64];
+
+static struct irq_domain *beatic_host;
+
+/*
+ * In this implementation, "virq" == "IRQ plug number",
+ * "(irq_hw_number_t)hwirq" == "IRQ outlet number".
+ */
+
+/* assumption: locked */
+static inline void beatic_update_irq_mask(unsigned int irq_plug)
+{
+	int off;
+	unsigned long masks[4];
+
+	off = (irq_plug / 256) * 4;
+	masks[0] = beatic_irq_mask_enable[off + 0]
+		& beatic_irq_mask_ack[off + 0];
+	masks[1] = beatic_irq_mask_enable[off + 1]
+		& beatic_irq_mask_ack[off + 1];
+	masks[2] = beatic_irq_mask_enable[off + 2]
+		& beatic_irq_mask_ack[off + 2];
+	masks[3] = beatic_irq_mask_enable[off + 3]
+		& beatic_irq_mask_ack[off + 3];
+	if (beat_set_interrupt_mask(irq_plug&~255UL,
+		masks[0], masks[1], masks[2], masks[3]) != 0)
+		panic("Failed to set mask IRQ!");
+}
+
+static void beatic_mask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+	beatic_irq_mask_enable[d->irq/64] &= ~(1UL << (63 - (d->irq%64)));
+	beatic_update_irq_mask(d->irq);
+	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static void beatic_unmask_irq(struct irq_data *d)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+	beatic_irq_mask_enable[d->irq/64] |= 1UL << (63 - (d->irq%64));
+	beatic_update_irq_mask(d->irq);
+	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static void beatic_ack_irq(struct irq_data *d)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+	beatic_irq_mask_ack[d->irq/64] &= ~(1UL << (63 - (d->irq%64)));
+	beatic_update_irq_mask(d->irq);
+	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static void beatic_end_irq(struct irq_data *d)
+{
+	s64 err;
+	unsigned long flags;
+
+	err = beat_downcount_of_interrupt(d->irq);
+	if (err != 0) {
+		if ((err & 0xFFFFFFFF) != 0xFFFFFFF5) /* -11: wrong state */
+			panic("Failed to downcount IRQ! Error = %16llx", err);
+
+		printk(KERN_ERR "IRQ over-downcounted, plug %d\n", d->irq);
+	}
+	raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
+	beatic_irq_mask_ack[d->irq/64] |= 1UL << (63 - (d->irq%64));
+	beatic_update_irq_mask(d->irq);
+	raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
+}
+
+static struct irq_chip beatic_pic = {
+	.name = "CELL-BEAT",
+	.irq_unmask = beatic_unmask_irq,
+	.irq_mask = beatic_mask_irq,
+	.irq_eoi = beatic_end_irq,
+};
+
+/*
+ * Dispose binding hardware IRQ number (hw) and Virtuql IRQ number (virq),
+ * update flags.
+ *
+ * Note that the number (virq) is already assigned at upper layer.
+ */
+static void beatic_pic_host_unmap(struct irq_domain *h, unsigned int virq)
+{
+	beat_destruct_irq_plug(virq);
+}
+
+/*
+ * Create or update binding hardware IRQ number (hw) and Virtuql
+ * IRQ number (virq). This is called only once for a given mapping.
+ *
+ * Note that the number (virq) is already assigned at upper layer.
+ */
+static int beatic_pic_host_map(struct irq_domain *h, unsigned int virq,
+			       irq_hw_number_t hw)
+{
+	int64_t	err;
+
+	err = beat_construct_and_connect_irq_plug(virq, hw);
+	if (err < 0)
+		return -EIO;
+
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_chip_and_handler(virq, &beatic_pic, handle_fasteoi_irq);
+	return 0;
+}
+
+/*
+ * Translate device-tree interrupt spec to irq_hw_number_t style (ulong),
+ * to pass away to irq_create_mapping().
+ *
+ * Called from irq_create_of_mapping() only.
+ * Note: We have only 1 entry to translate.
+ */
+static int beatic_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
+				 const u32 *intspec, unsigned int intsize,
+				 irq_hw_number_t *out_hwirq,
+				 unsigned int *out_flags)
+{
+	const u64 *intspec2 = (const u64 *)intspec;
+
+	*out_hwirq = *intspec2;
+	*out_flags |= IRQ_TYPE_LEVEL_LOW;
+	return 0;
+}
+
+static int beatic_pic_host_match(struct irq_domain *h, struct device_node *np)
+{
+	/* Match all */
+	return 1;
+}
+
+static const struct irq_domain_ops beatic_pic_host_ops = {
+	.map = beatic_pic_host_map,
+	.unmap = beatic_pic_host_unmap,
+	.xlate = beatic_pic_host_xlate,
+	.match = beatic_pic_host_match,
+};
+
+/*
+ * Get an IRQ number
+ * Note: returns VIRQ
+ */
+static inline unsigned int beatic_get_irq_plug(void)
+{
+	int i;
+	uint64_t	pending[4], ub;
+
+	for (i = 0; i < MAX_IRQS; i += 256) {
+		beat_detect_pending_interrupts(i, pending);
+		__asm__ ("cntlzd %0,%1":"=r"(ub):
+			"r"(pending[0] & beatic_irq_mask_enable[i/64+0]
+				       & beatic_irq_mask_ack[i/64+0]));
+		if (ub != 64)
+			return i + ub + 0;
+		__asm__ ("cntlzd %0,%1":"=r"(ub):
+			"r"(pending[1] & beatic_irq_mask_enable[i/64+1]
+				       & beatic_irq_mask_ack[i/64+1]));
+		if (ub != 64)
+			return i + ub + 64;
+		__asm__ ("cntlzd %0,%1":"=r"(ub):
+			"r"(pending[2] & beatic_irq_mask_enable[i/64+2]
+				       & beatic_irq_mask_ack[i/64+2]));
+		if (ub != 64)
+			return i + ub + 128;
+		__asm__ ("cntlzd %0,%1":"=r"(ub):
+			"r"(pending[3] & beatic_irq_mask_enable[i/64+3]
+				       & beatic_irq_mask_ack[i/64+3]));
+		if (ub != 64)
+			return i + ub + 192;
+	}
+
+	return NO_IRQ;
+}
+unsigned int beatic_get_irq(void)
+{
+	unsigned int ret;
+
+	ret = beatic_get_irq_plug();
+	if (ret != NO_IRQ)
+		beatic_ack_irq(irq_get_irq_data(ret));
+	return ret;
+}
+
+/*
+ */
+void __init beatic_init_IRQ(void)
+{
+	int	i;
+
+	memset(beatic_irq_mask_enable, 0, sizeof(beatic_irq_mask_enable));
+	memset(beatic_irq_mask_ack, 255, sizeof(beatic_irq_mask_ack));
+	for (i = 0; i < MAX_IRQS; i += 256)
+		beat_set_interrupt_mask(i, 0L, 0L, 0L, 0L);
+
+	/* Set out get_irq function */
+	ppc_md.get_irq = beatic_get_irq;
+
+	/* Allocate an irq host */
+	beatic_host = irq_domain_add_nomap(NULL, ~0, &beatic_pic_host_ops, NULL);
+	BUG_ON(beatic_host == NULL);
+	irq_set_default_host(beatic_host);
+}
+
+void beatic_deinit_IRQ(void)
+{
+	int	i;
+
+	for (i = 1; i < nr_irqs; i++)
+		beat_destruct_irq_plug(i);
+}
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
new file mode 100644
index 00000000000..a7e52f91a07
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_interrupt.h
@@ -0,0 +1,30 @@
+/*
+ * Celleb/Beat Interrupt controller
+ *
+ * (C) Copyright 2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef ASM_BEAT_PIC_H
+#define ASM_BEAT_PIC_H
+#ifdef __KERNEL__
+
+extern void beatic_init_IRQ(void);
+extern unsigned int beatic_get_irq(void);
+extern void beatic_deinit_IRQ(void);
+
+#endif
+#endif /* ASM_BEAT_PIC_H */
diff --git a/arch/powerpc/platforms/cell/beat_iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c
new file mode 100644
index 00000000000..3ce68556893
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_iommu.c
@@ -0,0 +1,115 @@
+/*
+ * Support for IOMMU on Celleb platform.
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+
+#include "beat_wrapper.h"
+
+#define DMA_FLAGS 0xf800000000000000UL	/* r/w permitted, coherency required,
+					   strongest order */
+
+static int __init find_dma_window(u64 *io_space_id, u64 *ioid,
+				  u64 *base, u64 *size, u64 *io_page_size)
+{
+	struct device_node *dn;
+	const unsigned long *dma_window;
+
+	for_each_node_by_type(dn, "ioif") {
+		dma_window = of_get_property(dn, "toshiba,dma-window", NULL);
+		if (dma_window) {
+			*io_space_id = (dma_window[0] >> 32) & 0xffffffffUL;
+			*ioid = dma_window[0] & 0x7ffUL;
+			*base = dma_window[1];
+			*size = dma_window[2];
+			*io_page_size = 1 << dma_window[3];
+			of_node_put(dn);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static unsigned long celleb_dma_direct_offset;
+
+static void __init celleb_init_direct_mapping(void)
+{
+	u64 lpar_addr, io_addr;
+	u64 io_space_id, ioid, dma_base, dma_size, io_page_size;
+
+	if (!find_dma_window(&io_space_id, &ioid, &dma_base, &dma_size,
+			     &io_page_size)) {
+		pr_info("No dma window found !\n");
+		return;
+	}
+
+	for (lpar_addr = 0; lpar_addr < dma_size; lpar_addr += io_page_size) {
+		io_addr = lpar_addr + dma_base;
+		(void)beat_put_iopte(io_space_id, io_addr, lpar_addr,
+				     ioid, DMA_FLAGS);
+	}
+
+	celleb_dma_direct_offset = dma_base;
+}
+
+static void celleb_dma_dev_setup(struct device *dev)
+{
+	set_dma_ops(dev, &dma_direct_ops);
+	set_dma_offset(dev, celleb_dma_direct_offset);
+}
+
+static void celleb_pci_dma_dev_setup(struct pci_dev *pdev)
+{
+	celleb_dma_dev_setup(&pdev->dev);
+}
+
+static int celleb_of_bus_notify(struct notifier_block *nb,
+				unsigned long action, void *data)
+{
+	struct device *dev = data;
+
+	/* We are only intereted in device addition */
+	if (action != BUS_NOTIFY_ADD_DEVICE)
+		return 0;
+
+	celleb_dma_dev_setup(dev);
+
+	return 0;
+}
+
+static struct notifier_block celleb_of_bus_notifier = {
+	.notifier_call = celleb_of_bus_notify
+};
+
+static int __init celleb_init_iommu(void)
+{
+	celleb_init_direct_mapping();
+	ppc_md.pci_dma_dev_setup = celleb_pci_dma_dev_setup;
+	bus_register_notifier(&platform_bus_type, &celleb_of_bus_notifier);
+
+	return 0;
+}
+
+machine_arch_initcall(celleb_beat, celleb_init_iommu);
diff --git a/arch/powerpc/platforms/cell/beat_spu_priv1.c b/arch/powerpc/platforms/cell/beat_spu_priv1.c
new file mode 100644
index 00000000000..13f52589d3a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_spu_priv1.c
@@ -0,0 +1,205 @@
+/*
+ * spu hypervisor abstraction for Beat
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <asm/types.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+
+#include "beat_wrapper.h"
+
+static inline void _int_mask_set(struct spu *spu, int class, u64 mask)
+{
+	spu->shadow_int_mask_RW[class] = mask;
+	beat_set_irq_mask_for_spe(spu->spe_id, class, mask);
+}
+
+static inline u64 _int_mask_get(struct spu *spu, int class)
+{
+	return spu->shadow_int_mask_RW[class];
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+	_int_mask_set(spu, class, mask);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+	return _int_mask_get(spu, class);
+}
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+	old_mask = _int_mask_get(spu, class);
+	_int_mask_set(spu, class, old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+	old_mask = _int_mask_get(spu, class);
+	_int_mask_set(spu, class, old_mask | mask);
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+	beat_clear_interrupt_status_of_spe(spu->spe_id, class, stat);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+	u64 int_stat;
+	beat_get_interrupt_status_of_spe(spu->spe_id, class, &int_stat);
+	return int_stat;
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+	return;
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+	u64 dar;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_dar_RW), &dar);
+	return dar;
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+	u64 dsisr;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_dsisr_RW), &dsisr);
+	return dsisr;
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_dsisr_RW), dsisr);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+	return;
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_sr1_RW), sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+	u64 sr1;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_sr1_RW), &sr1);
+	return sr1;
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_tclass_id_RW), tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+	u64 tclass_id;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, mfc_tclass_id_RW), &tclass_id);
+	return tclass_id;
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, tlb_invalidate_entry_W), 0ul);
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, resource_allocation_groupID_RW),
+		id);
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+	u64 id;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, resource_allocation_groupID_RW),
+		&id);
+	return id;
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+	beat_set_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, resource_allocation_enable_RW),
+		enable);
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+	u64 enable;
+	beat_get_spe_privileged_state_1_registers(
+		spu->spe_id,
+		offsetof(struct spu_priv1, resource_allocation_enable_RW),
+		&enable);
+	return enable;
+}
+
+const struct spu_priv1_ops spu_priv1_beat_ops = {
+	.int_mask_and = int_mask_and,
+	.int_mask_or = int_mask_or,
+	.int_mask_set = int_mask_set,
+	.int_mask_get = int_mask_get,
+	.int_stat_clear = int_stat_clear,
+	.int_stat_get = int_stat_get,
+	.cpu_affinity_set = cpu_affinity_set,
+	.mfc_dar_get = mfc_dar_get,
+	.mfc_dsisr_get = mfc_dsisr_get,
+	.mfc_dsisr_set = mfc_dsisr_set,
+	.mfc_sdr_setup = mfc_sdr_setup,
+	.mfc_sr1_set = mfc_sr1_set,
+	.mfc_sr1_get = mfc_sr1_get,
+	.mfc_tclass_id_set = mfc_tclass_id_set,
+	.mfc_tclass_id_get = mfc_tclass_id_get,
+	.tlb_invalidate = tlb_invalidate,
+	.resource_allocation_groupID_set = resource_allocation_groupID_set,
+	.resource_allocation_groupID_get = resource_allocation_groupID_get,
+	.resource_allocation_enable_set = resource_allocation_enable_set,
+	.resource_allocation_enable_get = resource_allocation_enable_get,
+};
diff --git a/arch/powerpc/platforms/cell/beat_syscall.h b/arch/powerpc/platforms/cell/beat_syscall.h
new file mode 100644
index 00000000000..8580dc7e179
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_syscall.h
@@ -0,0 +1,164 @@
+/*
+ * Beat hypervisor call numbers
+ *
+ * (C) Copyright 2004-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef BEAT_BEAT_syscall_H
+#define BEAT_BEAT_syscall_H
+
+#ifdef	__ASSEMBLY__
+#define	__BEAT_ADD_VENDOR_ID(__x, __v)	((__v)<<60|(__x))
+#else
+#define	__BEAT_ADD_VENDOR_ID(__x, __v)	((u64)(__v)<<60|(__x))
+#endif
+#define HV_allocate_memory __BEAT_ADD_VENDOR_ID(0, 0)
+#define HV_construct_virtual_address_space __BEAT_ADD_VENDOR_ID(2, 0)
+#define HV_destruct_virtual_address_space __BEAT_ADD_VENDOR_ID(10, 0)
+#define HV_get_virtual_address_space_id_of_ppe __BEAT_ADD_VENDOR_ID(4, 0)
+#define HV_query_logical_partition_address_region_info 			\
+						__BEAT_ADD_VENDOR_ID(6, 0)
+#define HV_release_memory __BEAT_ADD_VENDOR_ID(13, 0)
+#define HV_select_virtual_address_space __BEAT_ADD_VENDOR_ID(7, 0)
+#define HV_load_range_registers __BEAT_ADD_VENDOR_ID(68, 0)
+#define HV_set_ppe_l2cache_rmt_entry __BEAT_ADD_VENDOR_ID(70, 0)
+#define HV_set_ppe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(71, 0)
+#define HV_set_spe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(72, 0)
+#define HV_get_io_address_translation_fault_info __BEAT_ADD_VENDOR_ID(14, 0)
+#define HV_get_iopte __BEAT_ADD_VENDOR_ID(16, 0)
+#define HV_preload_iopt_cache __BEAT_ADD_VENDOR_ID(17, 0)
+#define HV_put_iopte __BEAT_ADD_VENDOR_ID(15, 0)
+#define HV_connect_event_ports __BEAT_ADD_VENDOR_ID(21, 0)
+#define HV_construct_event_receive_port __BEAT_ADD_VENDOR_ID(18, 0)
+#define HV_destruct_event_receive_port __BEAT_ADD_VENDOR_ID(19, 0)
+#define HV_destruct_event_send_port __BEAT_ADD_VENDOR_ID(22, 0)
+#define HV_get_state_of_event_send_port __BEAT_ADD_VENDOR_ID(25, 0)
+#define HV_request_to_connect_event_ports __BEAT_ADD_VENDOR_ID(20, 0)
+#define HV_send_event_externally __BEAT_ADD_VENDOR_ID(23, 0)
+#define HV_send_event_locally __BEAT_ADD_VENDOR_ID(24, 0)
+#define HV_construct_and_connect_irq_plug __BEAT_ADD_VENDOR_ID(28, 0)
+#define HV_destruct_irq_plug __BEAT_ADD_VENDOR_ID(29, 0)
+#define HV_detect_pending_interrupts __BEAT_ADD_VENDOR_ID(26, 0)
+#define HV_end_of_interrupt __BEAT_ADD_VENDOR_ID(27, 0)
+#define HV_assign_control_signal_notification_port __BEAT_ADD_VENDOR_ID(45, 0)
+#define HV_end_of_control_signal_processing __BEAT_ADD_VENDOR_ID(48, 0)
+#define HV_get_control_signal __BEAT_ADD_VENDOR_ID(46, 0)
+#define HV_set_irq_mask_for_spe __BEAT_ADD_VENDOR_ID(61, 0)
+#define HV_shutdown_logical_partition __BEAT_ADD_VENDOR_ID(44, 0)
+#define HV_connect_message_ports __BEAT_ADD_VENDOR_ID(35, 0)
+#define HV_destruct_message_port __BEAT_ADD_VENDOR_ID(36, 0)
+#define HV_receive_message __BEAT_ADD_VENDOR_ID(37, 0)
+#define HV_get_message_port_info __BEAT_ADD_VENDOR_ID(34, 0)
+#define HV_request_to_connect_message_ports __BEAT_ADD_VENDOR_ID(33, 0)
+#define HV_send_message __BEAT_ADD_VENDOR_ID(32, 0)
+#define HV_get_logical_ppe_id __BEAT_ADD_VENDOR_ID(69, 0)
+#define HV_pause __BEAT_ADD_VENDOR_ID(9, 0)
+#define HV_destruct_shared_memory_handle __BEAT_ADD_VENDOR_ID(51, 0)
+#define HV_get_shared_memory_info __BEAT_ADD_VENDOR_ID(52, 0)
+#define HV_permit_sharing_memory __BEAT_ADD_VENDOR_ID(50, 0)
+#define HV_request_to_attach_shared_memory __BEAT_ADD_VENDOR_ID(49, 0)
+#define HV_enable_logical_spe_execution __BEAT_ADD_VENDOR_ID(55, 0)
+#define HV_construct_logical_spe __BEAT_ADD_VENDOR_ID(53, 0)
+#define HV_disable_logical_spe_execution __BEAT_ADD_VENDOR_ID(56, 0)
+#define HV_destruct_logical_spe __BEAT_ADD_VENDOR_ID(54, 0)
+#define HV_sense_spe_execution_status __BEAT_ADD_VENDOR_ID(58, 0)
+#define HV_insert_htab_entry __BEAT_ADD_VENDOR_ID(101, 0)
+#define HV_read_htab_entries __BEAT_ADD_VENDOR_ID(95, 0)
+#define HV_write_htab_entry __BEAT_ADD_VENDOR_ID(94, 0)
+#define HV_assign_io_address_translation_fault_port 			\
+						__BEAT_ADD_VENDOR_ID(100, 0)
+#define HV_set_interrupt_mask __BEAT_ADD_VENDOR_ID(73, 0)
+#define HV_get_logical_partition_id __BEAT_ADD_VENDOR_ID(74, 0)
+#define HV_create_repository_node2 __BEAT_ADD_VENDOR_ID(90, 0)
+#define HV_create_repository_node __BEAT_ADD_VENDOR_ID(90, 0) /* alias */
+#define HV_get_repository_node_value2 __BEAT_ADD_VENDOR_ID(91, 0)
+#define HV_get_repository_node_value __BEAT_ADD_VENDOR_ID(91, 0) /* alias */
+#define HV_modify_repository_node_value2 __BEAT_ADD_VENDOR_ID(92, 0)
+#define HV_modify_repository_node_value __BEAT_ADD_VENDOR_ID(92, 0) /* alias */
+#define HV_remove_repository_node2 __BEAT_ADD_VENDOR_ID(93, 0)
+#define HV_remove_repository_node __BEAT_ADD_VENDOR_ID(93, 0) /* alias */
+#define HV_cancel_shared_memory __BEAT_ADD_VENDOR_ID(104, 0)
+#define HV_clear_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(206, 0)
+#define HV_construct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(80, 0)
+#define HV_destruct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(81, 0)
+#define HV_disconnect_ipspc_service __BEAT_ADD_VENDOR_ID(88, 0)
+#define HV_execute_ipspc_command __BEAT_ADD_VENDOR_ID(86, 0)
+#define HV_get_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(205, 0)
+#define HV_get_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(208, 0)
+#define HV_permit_use_of_ipspc_service __BEAT_ADD_VENDOR_ID(85, 0)
+#define HV_reinitialize_logical_spe __BEAT_ADD_VENDOR_ID(82, 0)
+#define HV_request_ipspc_service __BEAT_ADD_VENDOR_ID(84, 0)
+#define HV_stop_ipspc_command __BEAT_ADD_VENDOR_ID(87, 0)
+#define HV_set_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(204, 0)
+#define HV_get_status_of_ipspc_service __BEAT_ADD_VENDOR_ID(203, 0)
+#define HV_put_characters_to_console __BEAT_ADD_VENDOR_ID(0x101, 1)
+#define HV_get_characters_from_console __BEAT_ADD_VENDOR_ID(0x102, 1)
+#define HV_get_base_clock __BEAT_ADD_VENDOR_ID(0x111, 1)
+#define HV_set_base_clock __BEAT_ADD_VENDOR_ID(0x112, 1)
+#define HV_get_frame_cycle __BEAT_ADD_VENDOR_ID(0x114, 1)
+#define HV_disable_console __BEAT_ADD_VENDOR_ID(0x115, 1)
+#define HV_disable_all_console __BEAT_ADD_VENDOR_ID(0x116, 1)
+#define HV_oneshot_timer __BEAT_ADD_VENDOR_ID(0x117, 1)
+#define HV_set_dabr __BEAT_ADD_VENDOR_ID(0x118, 1)
+#define HV_get_dabr __BEAT_ADD_VENDOR_ID(0x119, 1)
+#define HV_start_hv_stats __BEAT_ADD_VENDOR_ID(0x21c, 1)
+#define HV_stop_hv_stats __BEAT_ADD_VENDOR_ID(0x21d, 1)
+#define HV_get_hv_stats __BEAT_ADD_VENDOR_ID(0x21e, 1)
+#define HV_get_hv_error_stats __BEAT_ADD_VENDOR_ID(0x221, 1)
+#define HV_get_stats __BEAT_ADD_VENDOR_ID(0x224, 1)
+#define HV_get_heap_stats __BEAT_ADD_VENDOR_ID(0x225, 1)
+#define HV_get_memory_stats __BEAT_ADD_VENDOR_ID(0x227, 1)
+#define HV_get_memory_detail __BEAT_ADD_VENDOR_ID(0x228, 1)
+#define HV_set_priority_of_irq_outlet __BEAT_ADD_VENDOR_ID(0x122, 1)
+#define HV_get_physical_spe_by_reservation_id __BEAT_ADD_VENDOR_ID(0x128, 1)
+#define HV_get_spe_context __BEAT_ADD_VENDOR_ID(0x129, 1)
+#define HV_set_spe_context __BEAT_ADD_VENDOR_ID(0x12a, 1)
+#define HV_downcount_of_interrupt __BEAT_ADD_VENDOR_ID(0x12e, 1)
+#define HV_peek_spe_context __BEAT_ADD_VENDOR_ID(0x12f, 1)
+#define HV_read_bpa_register __BEAT_ADD_VENDOR_ID(0x131, 1)
+#define HV_write_bpa_register __BEAT_ADD_VENDOR_ID(0x132, 1)
+#define HV_map_context_table_of_spe __BEAT_ADD_VENDOR_ID(0x137, 1)
+#define HV_get_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x138, 1)
+#define HV_set_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x139, 1)
+#define HV_init_pm __BEAT_ADD_VENDOR_ID(0x150, 1)
+#define HV_set_pm_signal __BEAT_ADD_VENDOR_ID(0x151, 1)
+#define HV_get_pm_signal __BEAT_ADD_VENDOR_ID(0x152, 1)
+#define HV_set_pm_config __BEAT_ADD_VENDOR_ID(0x153, 1)
+#define HV_get_pm_config __BEAT_ADD_VENDOR_ID(0x154, 1)
+#define HV_get_inner_trace_data __BEAT_ADD_VENDOR_ID(0x155, 1)
+#define HV_set_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x156, 1)
+#define HV_get_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x157, 1)
+#define HV_set_pm_interrupt __BEAT_ADD_VENDOR_ID(0x158, 1)
+#define HV_get_pm_interrupt __BEAT_ADD_VENDOR_ID(0x159, 1)
+#define HV_kick_pm __BEAT_ADD_VENDOR_ID(0x160, 1)
+#define HV_construct_pm_context __BEAT_ADD_VENDOR_ID(0x164, 1)
+#define HV_destruct_pm_context __BEAT_ADD_VENDOR_ID(0x165, 1)
+#define HV_be_slow __BEAT_ADD_VENDOR_ID(0x170, 1)
+#define HV_assign_ipspc_server_connection_status_notification_port 	\
+						__BEAT_ADD_VENDOR_ID(0x173, 1)
+#define HV_get_raid_of_physical_spe __BEAT_ADD_VENDOR_ID(0x174, 1)
+#define HV_set_physical_spe_to_rag __BEAT_ADD_VENDOR_ID(0x175, 1)
+#define HV_release_physical_spe_from_rag __BEAT_ADD_VENDOR_ID(0x176, 1)
+#define HV_rtc_read __BEAT_ADD_VENDOR_ID(0x190, 1)
+#define HV_rtc_write __BEAT_ADD_VENDOR_ID(0x191, 1)
+#define HV_eeprom_read __BEAT_ADD_VENDOR_ID(0x192, 1)
+#define HV_eeprom_write __BEAT_ADD_VENDOR_ID(0x193, 1)
+#define HV_insert_htab_entry3 __BEAT_ADD_VENDOR_ID(0x104, 1)
+#define HV_invalidate_htab_entry3 __BEAT_ADD_VENDOR_ID(0x105, 1)
+#define HV_update_htab_permission3 __BEAT_ADD_VENDOR_ID(0x106, 1)
+#define HV_clear_htab3 __BEAT_ADD_VENDOR_ID(0x107, 1)
+#endif
diff --git a/arch/powerpc/platforms/cell/beat_udbg.c b/arch/powerpc/platforms/cell/beat_udbg.c
new file mode 100644
index 00000000000..350735bc888
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_udbg.c
@@ -0,0 +1,98 @@
+/*
+ * udbg function for Beat
+ *
+ * (C) Copyright 2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/console.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+
+#include "beat.h"
+
+#define	celleb_vtermno	0
+
+static void udbg_putc_beat(char c)
+{
+	unsigned long rc;
+
+	if (c == '\n')
+		udbg_putc_beat('\r');
+
+	rc = beat_put_term_char(celleb_vtermno, 1, (uint64_t)c << 56, 0);
+}
+
+/* Buffered chars getc */
+static u64 inbuflen;
+static u64 inbuf[2];	/* must be 2 u64s */
+
+static int udbg_getc_poll_beat(void)
+{
+	/* The interface is tricky because it may return up to 16 chars.
+	 * We save them statically for future calls to udbg_getc().
+	 */
+	char ch, *buf = (char *)inbuf;
+	int i;
+	long rc;
+	if (inbuflen == 0) {
+		/* get some more chars. */
+		inbuflen = 0;
+		rc = beat_get_term_char(celleb_vtermno, &inbuflen,
+					inbuf+0, inbuf+1);
+		if (rc != 0)
+			inbuflen = 0;	/* otherwise inbuflen is garbage */
+	}
+	if (inbuflen <= 0 || inbuflen > 16) {
+		/* Catch error case as well as other oddities (corruption) */
+		inbuflen = 0;
+		return -1;
+	}
+	ch = buf[0];
+	for (i = 1; i < inbuflen; i++)	/* shuffle them down. */
+		buf[i-1] = buf[i];
+	inbuflen--;
+	return ch;
+}
+
+static int udbg_getc_beat(void)
+{
+	int ch;
+	for (;;) {
+		ch = udbg_getc_poll_beat();
+		if (ch == -1) {
+			/* This shouldn't be needed...but... */
+			volatile unsigned long delay;
+			for (delay = 0; delay < 2000000; delay++)
+				;
+		} else {
+			return ch;
+		}
+	}
+}
+
+/* call this from early_init() for a working debug console on
+ * vterm capable LPAR machines
+ */
+void __init udbg_init_debug_beat(void)
+{
+	udbg_putc = udbg_putc_beat;
+	udbg_getc = udbg_getc_beat;
+	udbg_getc_poll = udbg_getc_poll_beat;
+}
diff --git a/arch/powerpc/platforms/cell/beat_wrapper.h b/arch/powerpc/platforms/cell/beat_wrapper.h
new file mode 100644
index 00000000000..c1109969f24
--- /dev/null
+++ b/arch/powerpc/platforms/cell/beat_wrapper.h
@@ -0,0 +1,290 @@
+/*
+ * Beat hypervisor call I/F
+ *
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/pseries/plpar_wrapper.h.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef BEAT_HCALL
+#include <linux/string.h>
+#include "beat_syscall.h"
+
+/* defined in hvCall.S */
+extern s64 beat_hcall_norets(u64 opcode, ...);
+extern s64 beat_hcall_norets8(u64 opcode, u64 arg1, u64 arg2, u64 arg3,
+	u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8);
+extern s64 beat_hcall1(u64 opcode, u64 retbuf[1], ...);
+extern s64 beat_hcall2(u64 opcode, u64 retbuf[2], ...);
+extern s64 beat_hcall3(u64 opcode, u64 retbuf[3], ...);
+extern s64 beat_hcall4(u64 opcode, u64 retbuf[4], ...);
+extern s64 beat_hcall5(u64 opcode, u64 retbuf[5], ...);
+extern s64 beat_hcall6(u64 opcode, u64 retbuf[6], ...);
+
+static inline s64 beat_downcount_of_interrupt(u64 plug_id)
+{
+	return beat_hcall_norets(HV_downcount_of_interrupt, plug_id);
+}
+
+static inline s64 beat_set_interrupt_mask(u64 index,
+	u64 val0, u64 val1, u64 val2, u64 val3)
+{
+	return beat_hcall_norets(HV_set_interrupt_mask, index,
+	       val0, val1, val2, val3);
+}
+
+static inline s64 beat_destruct_irq_plug(u64 plug_id)
+{
+	return beat_hcall_norets(HV_destruct_irq_plug, plug_id);
+}
+
+static inline s64 beat_construct_and_connect_irq_plug(u64 plug_id,
+	u64 outlet_id)
+{
+	return beat_hcall_norets(HV_construct_and_connect_irq_plug, plug_id,
+	       outlet_id);
+}
+
+static inline s64 beat_detect_pending_interrupts(u64 index, u64 *retbuf)
+{
+	return beat_hcall4(HV_detect_pending_interrupts, retbuf, index);
+}
+
+static inline s64 beat_pause(u64 style)
+{
+	return beat_hcall_norets(HV_pause, style);
+}
+
+static inline s64 beat_read_htab_entries(u64 htab_id, u64 index, u64 *retbuf)
+{
+	return beat_hcall5(HV_read_htab_entries, retbuf, htab_id, index);
+}
+
+static inline s64 beat_insert_htab_entry(u64 htab_id, u64 group,
+	u64 bitmask, u64 hpte_v, u64 hpte_r, u64 *slot)
+{
+	u64 dummy[3];
+	s64 ret;
+
+	ret = beat_hcall3(HV_insert_htab_entry, dummy, htab_id, group,
+		bitmask, hpte_v, hpte_r);
+	*slot = dummy[0];
+	return ret;
+}
+
+static inline s64 beat_write_htab_entry(u64 htab_id, u64 slot,
+	u64 hpte_v, u64 hpte_r, u64 mask_v, u64 mask_r,
+	u64 *ret_v, u64 *ret_r)
+{
+	u64 dummy[2];
+	s64 ret;
+
+	ret = beat_hcall2(HV_write_htab_entry, dummy, htab_id, slot,
+		hpte_v, hpte_r, mask_v, mask_r);
+	*ret_v = dummy[0];
+	*ret_r = dummy[1];
+	return ret;
+}
+
+static inline s64 beat_insert_htab_entry3(u64 htab_id, u64 group,
+	u64 hpte_v, u64 hpte_r, u64 mask_v, u64 value_v, u64 *slot)
+{
+	u64 dummy[1];
+	s64 ret;
+
+	ret = beat_hcall1(HV_insert_htab_entry3, dummy, htab_id, group,
+		hpte_v, hpte_r, mask_v, value_v);
+	*slot = dummy[0];
+	return ret;
+}
+
+static inline s64 beat_invalidate_htab_entry3(u64 htab_id, u64 group,
+	u64 va, u64 pss)
+{
+	return beat_hcall_norets(HV_invalidate_htab_entry3,
+		htab_id, group, va, pss);
+}
+
+static inline s64 beat_update_htab_permission3(u64 htab_id, u64 group,
+	u64 va, u64 pss, u64 ptel_mask, u64 ptel_value)
+{
+	return beat_hcall_norets(HV_update_htab_permission3,
+		htab_id, group, va, pss, ptel_mask, ptel_value);
+}
+
+static inline s64 beat_clear_htab3(u64 htab_id)
+{
+	return beat_hcall_norets(HV_clear_htab3, htab_id);
+}
+
+static inline void beat_shutdown_logical_partition(u64 code)
+{
+	(void)beat_hcall_norets(HV_shutdown_logical_partition, code);
+}
+
+static inline s64 beat_rtc_write(u64 time_from_epoch)
+{
+	return beat_hcall_norets(HV_rtc_write, time_from_epoch);
+}
+
+static inline s64 beat_rtc_read(u64 *time_from_epoch)
+{
+	u64 dummy[1];
+	s64 ret;
+
+	ret = beat_hcall1(HV_rtc_read, dummy);
+	*time_from_epoch = dummy[0];
+	return ret;
+}
+
+#define	BEAT_NVRW_CNT	(sizeof(u64) * 6)
+
+static inline s64 beat_eeprom_write(u64 index, u64 length, u8 *buffer)
+{
+	u64	b[6];
+
+	if (length > BEAT_NVRW_CNT)
+		return -1;
+	memcpy(b, buffer, sizeof(b));
+	return beat_hcall_norets8(HV_eeprom_write, index, length,
+		b[0], b[1], b[2], b[3], b[4], b[5]);
+}
+
+static inline s64 beat_eeprom_read(u64 index, u64 length, u8 *buffer)
+{
+	u64	b[6];
+	s64	ret;
+
+	if (length > BEAT_NVRW_CNT)
+		return -1;
+	ret = beat_hcall6(HV_eeprom_read, b, index, length);
+	memcpy(buffer, b, length);
+	return ret;
+}
+
+static inline s64 beat_set_dabr(u64 value, u64 style)
+{
+	return beat_hcall_norets(HV_set_dabr, value, style);
+}
+
+static inline s64 beat_get_characters_from_console(u64 termno, u64 *len,
+	u8 *buffer)
+{
+	u64 dummy[3];
+	s64 ret;
+
+	ret = beat_hcall3(HV_get_characters_from_console, dummy, termno, len);
+	*len = dummy[0];
+	memcpy(buffer, dummy + 1, *len);
+	return ret;
+}
+
+static inline s64 beat_put_characters_to_console(u64 termno, u64 len,
+	u8 *buffer)
+{
+	u64 b[2];
+
+	memcpy(b, buffer, len);
+	return beat_hcall_norets(HV_put_characters_to_console, termno, len,
+		b[0], b[1]);
+}
+
+static inline s64 beat_get_spe_privileged_state_1_registers(
+		u64 id, u64 offsetof, u64 *value)
+{
+	u64 dummy[1];
+	s64 ret;
+
+	ret = beat_hcall1(HV_get_spe_privileged_state_1_registers, dummy, id,
+		offsetof);
+	*value = dummy[0];
+	return ret;
+}
+
+static inline s64 beat_set_irq_mask_for_spe(u64 id, u64 class, u64 mask)
+{
+	return beat_hcall_norets(HV_set_irq_mask_for_spe, id, class, mask);
+}
+
+static inline s64 beat_clear_interrupt_status_of_spe(u64 id, u64 class,
+	u64 mask)
+{
+	return beat_hcall_norets(HV_clear_interrupt_status_of_spe,
+		id, class, mask);
+}
+
+static inline s64 beat_set_spe_privileged_state_1_registers(
+		u64 id, u64 offsetof, u64 value)
+{
+	return beat_hcall_norets(HV_set_spe_privileged_state_1_registers,
+		id, offsetof, value);
+}
+
+static inline s64 beat_get_interrupt_status_of_spe(u64 id, u64 class, u64 *val)
+{
+	u64 dummy[1];
+	s64 ret;
+
+	ret = beat_hcall1(HV_get_interrupt_status_of_spe, dummy, id, class);
+	*val = dummy[0];
+	return ret;
+}
+
+static inline s64 beat_put_iopte(u64 ioas_id, u64 io_addr, u64 real_addr,
+	u64 ioid, u64 flags)
+{
+	return beat_hcall_norets(HV_put_iopte, ioas_id, io_addr, real_addr,
+		ioid, flags);
+}
+
+static inline s64 beat_construct_event_receive_port(u64 *port)
+{
+	u64 dummy[1];
+	s64 ret;
+
+	ret = beat_hcall1(HV_construct_event_receive_port, dummy);
+	*port = dummy[0];
+	return ret;
+}
+
+static inline s64 beat_destruct_event_receive_port(u64 port)
+{
+	s64 ret;
+
+	ret = beat_hcall_norets(HV_destruct_event_receive_port, port);
+	return ret;
+}
+
+static inline s64 beat_create_repository_node(u64 path[4], u64 data[2])
+{
+	s64 ret;
+
+	ret = beat_hcall_norets(HV_create_repository_node2,
+		path[0], path[1], path[2], path[3], data[0], data[1]);
+	return ret;
+}
+
+static inline s64 beat_get_repository_node_value(u64 lpid, u64 path[4],
+	u64 data[2])
+{
+	s64 ret;
+
+	ret = beat_hcall2(HV_get_repository_node_value2, data,
+		lpid, path[0], path[1], path[2], path[3]);
+	return ret;
+}
+
+#endif
diff --git a/arch/powerpc/platforms/cell/cbe_powerbutton.c b/arch/powerpc/platforms/cell/cbe_powerbutton.c
new file mode 100644
index 00000000000..2bb8031303f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_powerbutton.c
@@ -0,0 +1,118 @@
+/*
+ * driver for powerbutton on IBM cell blades
+ *
+ * (C) Copyright IBM Corp. 2005-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <asm/pmi.h>
+#include <asm/prom.h>
+
+static struct input_dev *button_dev;
+static struct platform_device *button_pdev;
+
+static void cbe_powerbutton_handle_pmi(pmi_message_t pmi_msg)
+{
+	BUG_ON(pmi_msg.type != PMI_TYPE_POWER_BUTTON);
+
+	input_report_key(button_dev, KEY_POWER, 1);
+	input_sync(button_dev);
+	input_report_key(button_dev, KEY_POWER, 0);
+	input_sync(button_dev);
+}
+
+static struct pmi_handler cbe_pmi_handler = {
+	.type			= PMI_TYPE_POWER_BUTTON,
+	.handle_pmi_message	= cbe_powerbutton_handle_pmi,
+};
+
+static int __init cbe_powerbutton_init(void)
+{
+	int ret = 0;
+	struct input_dev *dev;
+
+	if (!of_machine_is_compatible("IBM,CBPLUS-1.0")) {
+		printk(KERN_ERR "%s: Not a cell blade.\n", __func__);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	dev = input_allocate_device();
+	if (!dev) {
+		ret = -ENOMEM;
+		printk(KERN_ERR "%s: Not enough memory.\n", __func__);
+		goto out;
+	}
+
+	set_bit(EV_KEY, dev->evbit);
+	set_bit(KEY_POWER, dev->keybit);
+
+	dev->name = "Power Button";
+	dev->id.bustype = BUS_HOST;
+
+	/* this makes the button look like an acpi power button
+	 * no clue whether anyone relies on that though */
+	dev->id.product = 0x02;
+	dev->phys = "LNXPWRBN/button/input0";
+
+	button_pdev = platform_device_register_simple("power_button", 0, NULL, 0);
+	if (IS_ERR(button_pdev)) {
+		ret = PTR_ERR(button_pdev);
+		goto out_free_input;
+	}
+
+	dev->dev.parent = &button_pdev->dev;
+	ret = input_register_device(dev);
+	if (ret) {
+		printk(KERN_ERR "%s: Failed to register device\n", __func__);
+		goto out_free_pdev;
+	}
+
+	button_dev = dev;
+
+	ret = pmi_register_handler(&cbe_pmi_handler);
+	if (ret) {
+		printk(KERN_ERR "%s: Failed to register with pmi.\n", __func__);
+		goto out_free_pdev;
+	}
+
+	goto out;
+
+out_free_pdev:
+	platform_device_unregister(button_pdev);
+out_free_input:
+	input_free_device(dev);
+out:
+	return ret;
+}
+
+static void __exit cbe_powerbutton_exit(void)
+{
+	pmi_unregister_handler(&cbe_pmi_handler);
+	platform_device_unregister(button_pdev);
+	input_free_device(button_dev);
+}
+
+module_init(cbe_powerbutton_init);
+module_exit(cbe_powerbutton_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
new file mode 100644
index 00000000000..1428d583c23
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -0,0 +1,281 @@
+/*
+ * cbe_regs.c
+ *
+ * Accessor routines for the various MMIO register blocks of the CBE
+ *
+ * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ */
+
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/ptrace.h>
+#include <asm/cell-regs.h>
+
+/*
+ * Current implementation uses "cpu" nodes. We build our own mapping
+ * array of cpu numbers to cpu nodes locally for now to allow interrupt
+ * time code to have a fast path rather than call of_get_cpu_node(). If
+ * we implement cpu hotplug, we'll have to install an appropriate norifier
+ * in order to release references to the cpu going away
+ */
+static struct cbe_regs_map
+{
+	struct device_node *cpu_node;
+	struct device_node *be_node;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	struct cbe_iic_regs __iomem *iic_regs;
+	struct cbe_mic_tm_regs __iomem *mic_tm_regs;
+	struct cbe_pmd_shadow_regs pmd_shadow_regs;
+} cbe_regs_maps[MAX_CBE];
+static int cbe_regs_map_count;
+
+static struct cbe_thread_map
+{
+	struct device_node *cpu_node;
+	struct device_node *be_node;
+	struct cbe_regs_map *regs;
+	unsigned int thread_id;
+	unsigned int cbe_id;
+} cbe_thread_map[NR_CPUS];
+
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
+static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
+
+static struct cbe_regs_map *cbe_find_map(struct device_node *np)
+{
+	int i;
+	struct device_node *tmp_np;
+
+	if (strcasecmp(np->type, "spe")) {
+		for (i = 0; i < cbe_regs_map_count; i++)
+			if (cbe_regs_maps[i].cpu_node == np ||
+			    cbe_regs_maps[i].be_node == np)
+				return &cbe_regs_maps[i];
+		return NULL;
+	}
+
+	if (np->data)
+		return np->data;
+
+	/* walk up path until cpu or be node was found */
+	tmp_np = np;
+	do {
+		tmp_np = tmp_np->parent;
+		/* on a correct devicetree we wont get up to root */
+		BUG_ON(!tmp_np);
+	} while (strcasecmp(tmp_np->type, "cpu") &&
+		 strcasecmp(tmp_np->type, "be"));
+
+	np->data = cbe_find_map(tmp_np);
+
+	return np->data;
+}
+
+struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_pmd_regs);
+
+struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs);
+
+struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return &map->pmd_shadow_regs;
+}
+
+struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return &map->pmd_shadow_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->iic_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->iic_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np)
+{
+	struct cbe_regs_map *map = cbe_find_map(np);
+	if (map == NULL)
+		return NULL;
+	return map->mic_tm_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
+{
+	struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+	if (map == NULL)
+		return NULL;
+	return map->mic_tm_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
+
+u32 cbe_get_hw_thread_id(int cpu)
+{
+	return cbe_thread_map[cpu].thread_id;
+}
+EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id);
+
+u32 cbe_cpu_to_node(int cpu)
+{
+	return cbe_thread_map[cpu].cbe_id;
+}
+EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
+
+u32 cbe_node_to_cpu(int node)
+{
+	return cpumask_first(&cbe_local_mask[node]);
+
+}
+EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
+
+static struct device_node *cbe_get_be_node(int cpu_id)
+{
+	struct device_node *np;
+
+	for_each_node_by_type (np, "be") {
+		int len,i;
+		const phandle *cpu_handle;
+
+		cpu_handle = of_get_property(np, "cpus", &len);
+
+		/*
+		 * the CAB SLOF tree is non compliant, so we just assume
+		 * there is only one node
+		 */
+		if (WARN_ON_ONCE(!cpu_handle))
+			return np;
+
+		for (i=0; i<len; i++)
+			if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
+				return np;
+	}
+
+	return NULL;
+}
+
+void __init cbe_fill_regs_map(struct cbe_regs_map *map)
+{
+	if(map->be_node) {
+		struct device_node *be, *np;
+
+		be = map->be_node;
+
+		for_each_node_by_type(np, "pervasive")
+			if (of_get_parent(np) == be)
+				map->pmd_regs = of_iomap(np, 0);
+
+		for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller")
+			if (of_get_parent(np) == be)
+				map->iic_regs = of_iomap(np, 2);
+
+		for_each_node_by_type(np, "mic-tm")
+			if (of_get_parent(np) == be)
+				map->mic_tm_regs = of_iomap(np, 0);
+	} else {
+		struct device_node *cpu;
+		/* That hack must die die die ! */
+		const struct address_prop {
+			unsigned long address;
+			unsigned int len;
+		} __attribute__((packed)) *prop;
+
+		cpu = map->cpu_node;
+
+		prop = of_get_property(cpu, "pervasive", NULL);
+		if (prop != NULL)
+			map->pmd_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "iic", NULL);
+		if (prop != NULL)
+			map->iic_regs = ioremap(prop->address, prop->len);
+
+		prop = of_get_property(cpu, "mic-tm", NULL);
+		if (prop != NULL)
+			map->mic_tm_regs = ioremap(prop->address, prop->len);
+	}
+}
+
+
+void __init cbe_regs_init(void)
+{
+	int i;
+	unsigned int thread_id;
+	struct device_node *cpu;
+
+	/* Build local fast map of CPUs */
+	for_each_possible_cpu(i) {
+		cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id);
+		cbe_thread_map[i].be_node = cbe_get_be_node(i);
+		cbe_thread_map[i].thread_id = thread_id;
+	}
+
+	/* Find maps for each device tree CPU */
+	for_each_node_by_type(cpu, "cpu") {
+		struct cbe_regs_map *map;
+		unsigned int cbe_id;
+
+		cbe_id = cbe_regs_map_count++;
+		map = &cbe_regs_maps[cbe_id];
+
+		if (cbe_regs_map_count > MAX_CBE) {
+			printk(KERN_ERR "cbe_regs: More BE chips than supported"
+			       "!\n");
+			cbe_regs_map_count--;
+			of_node_put(cpu);
+			return;
+		}
+		map->cpu_node = cpu;
+
+		for_each_possible_cpu(i) {
+			struct cbe_thread_map *thread = &cbe_thread_map[i];
+
+			if (thread->cpu_node == cpu) {
+				thread->regs = map;
+				thread->cbe_id = cbe_id;
+				map->be_node = thread->be_node;
+				cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
+				if(thread->thread_id == 0)
+					cpumask_set_cpu(i, &cbe_first_online_cpu);
+			}
+		}
+
+		cbe_fill_regs_map(map);
+	}
+}
+
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
new file mode 100644
index 00000000000..2c15ff09448
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -0,0 +1,399 @@
+/*
+ * thermal support for the cell processor
+ *
+ * This module adds some sysfs attributes to cpu and spu nodes.
+ * Base for measurements are the digital thermal sensors (DTS)
+ * located on the chip.
+ * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius
+ * The attributes can be found under
+ * /sys/devices/system/cpu/cpuX/thermal
+ * /sys/devices/system/spu/spuX/thermal
+ *
+ * The following attributes are added for each node:
+ * temperature:
+ *	contains the current temperature measured by the DTS
+ * throttle_begin:
+ *	throttling begins when temperature is greater or equal to
+ *	throttle_begin. Setting this value to 125 prevents throttling.
+ * throttle_end:
+ *	throttling is being ceased, if the temperature is lower than
+ *	throttle_end. Due to a delay between applying throttling and
+ *	a reduced temperature this value should be less than throttle_begin.
+ *	A value equal to throttle_begin provides only a very little hysteresis.
+ * throttle_full_stop:
+ *	If the temperatrue is greater or equal to throttle_full_stop,
+ *	full throttling is applied to the cpu or spu. This value should be
+ *	greater than throttle_begin and throttle_end. Setting this value to
+ *	65 prevents the unit from running code at all.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <asm/spu.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/cell-regs.h>
+
+#include "spu_priv1_mmio.h"
+
+#define TEMP_MIN 65
+#define TEMP_MAX 125
+
+#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode)			\
+struct device_attribute attr_ ## _prefix ## _ ## _name = {	\
+	.attr = { .name = __stringify(_name), .mode = _mode },	\
+	.show	= _prefix ## _show_ ## _name,			\
+	.store	= _prefix ## _store_ ## _name,			\
+};
+
+static inline u8 reg_to_temp(u8 reg_value)
+{
+	return ((reg_value & 0x3f) << 1) + TEMP_MIN;
+}
+
+static inline u8 temp_to_reg(u8 temp)
+{
+	return ((temp - TEMP_MIN) >> 1) & 0x3f;
+}
+
+static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev)
+{
+	struct spu *spu;
+
+	spu = container_of(dev, struct spu, dev);
+
+	return cbe_get_pmd_regs(spu_devnode(spu));
+}
+
+/* returns the value for a given spu in a given register */
+static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg)
+{
+	union spe_reg value;
+	struct spu *spu;
+
+	spu = container_of(dev, struct spu, dev);
+	value.val = in_be64(&reg->val);
+
+	return value.spe[spu->spe_id];
+}
+
+static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr,
+			char *buf)
+{
+	u8 value;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+
+	pmd_regs = get_pmd_regs(dev);
+
+	value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1);
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos)
+{
+	u64 value;
+
+	value = in_be64(&pmd_regs->tm_tpr.val);
+	/* access the corresponding byte */
+	value >>= pos;
+	value &= 0x3F;
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
+{
+	u64 reg_value;
+	unsigned int temp;
+	u64 new_value;
+	int ret;
+
+	ret = sscanf(buf, "%u", &temp);
+
+	if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX)
+		return -EINVAL;
+
+	new_value = temp_to_reg(temp);
+
+	reg_value = in_be64(&pmd_regs->tm_tpr.val);
+
+	/* zero out bits for new value */
+	reg_value &= ~(0xffull << pos);
+	/* set bits to new value */
+	reg_value |= new_value << pos;
+
+	out_be64(&pmd_regs->tm_tpr.val, reg_value);
+	return size;
+}
+
+static ssize_t spu_show_throttle_end(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 0);
+}
+
+static ssize_t spu_show_throttle_begin(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 8);
+}
+
+static ssize_t spu_show_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(get_pmd_regs(dev), buf, 16);
+}
+
+static ssize_t spu_store_throttle_end(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 0);
+}
+
+static ssize_t spu_store_throttle_begin(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 8);
+}
+
+static ssize_t spu_store_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(get_pmd_regs(dev), buf, size, 16);
+}
+
+static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos)
+{
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	u64 value;
+
+	pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+	value = in_be64(&pmd_regs->ts_ctsr2);
+
+	value = (value >> pos) & 0x3f;
+
+	return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+
+/* shows the temperature of the DTS on the PPE,
+ * located near the linear thermal sensor */
+static ssize_t ppe_show_temp0(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return ppe_show_temp(dev, buf, 32);
+}
+
+/* shows the temperature of the second DTS on the PPE */
+static ssize_t ppe_show_temp1(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return ppe_show_temp(dev, buf, 0);
+}
+
+static ssize_t ppe_show_throttle_end(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32);
+}
+
+static ssize_t ppe_show_throttle_begin(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40);
+}
+
+static ssize_t ppe_show_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48);
+}
+
+static ssize_t ppe_store_throttle_end(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32);
+}
+
+static ssize_t ppe_store_throttle_begin(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40);
+}
+
+static ssize_t ppe_store_throttle_full_stop(struct device *dev,
+			struct device_attribute *attr, const char *buf, size_t size)
+{
+	return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48);
+}
+
+
+static struct device_attribute attr_spu_temperature = {
+	.attr = {.name = "temperature", .mode = 0400 },
+	.show = spu_show_temp,
+};
+
+static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600);
+
+
+static struct attribute *spu_attributes[] = {
+	&attr_spu_temperature.attr,
+	&attr_spu_throttle_end.attr,
+	&attr_spu_throttle_begin.attr,
+	&attr_spu_throttle_full_stop.attr,
+	NULL,
+};
+
+static struct attribute_group spu_attribute_group = {
+	.name	= "thermal",
+	.attrs	= spu_attributes,
+};
+
+static struct device_attribute attr_ppe_temperature0 = {
+	.attr = {.name = "temperature0", .mode = 0400 },
+	.show = ppe_show_temp0,
+};
+
+static struct device_attribute attr_ppe_temperature1 = {
+	.attr = {.name = "temperature1", .mode = 0400 },
+	.show = ppe_show_temp1,
+};
+
+static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
+
+static struct attribute *ppe_attributes[] = {
+	&attr_ppe_temperature0.attr,
+	&attr_ppe_temperature1.attr,
+	&attr_ppe_throttle_end.attr,
+	&attr_ppe_throttle_begin.attr,
+	&attr_ppe_throttle_full_stop.attr,
+	NULL,
+};
+
+static struct attribute_group ppe_attribute_group = {
+	.name	= "thermal",
+	.attrs	= ppe_attributes,
+};
+
+/*
+ * initialize throttling with default values
+ */
+static int __init init_default_values(void)
+{
+	int cpu;
+	struct cbe_pmd_regs __iomem *pmd_regs;
+	struct device *dev;
+	union ppe_spe_reg tpr;
+	union spe_reg str1;
+	u64 str2;
+	union spe_reg cr1;
+	u64 cr2;
+
+	/* TPR defaults */
+	/* ppe
+	 *	1F - no full stop
+	 *	08 - dynamic throttling starts if over 80 degrees
+	 *	03 - dynamic throttling ceases if below 70 degrees */
+	tpr.ppe = 0x1F0803;
+	/* spe
+	 *	10 - full stopped when over 96 degrees
+	 *	08 - dynamic throttling starts if over 80 degrees
+	 *	03 - dynamic throttling ceases if below 70 degrees
+	 */
+	tpr.spe = 0x100803;
+
+	/* STR defaults */
+	/* str1
+	 *	10 - stop 16 of 32 cycles
+	 */
+	str1.val = 0x1010101010101010ull;
+	/* str2
+	 *	10 - stop 16 of 32 cycles
+	 */
+	str2 = 0x10;
+
+	/* CR defaults */
+	/* cr1
+	 *	4 - normal operation
+	 */
+	cr1.val = 0x0404040404040404ull;
+	/* cr2
+	 *	4 - normal operation
+	 */
+	cr2 = 0x04;
+
+	for_each_possible_cpu (cpu) {
+		pr_debug("processing cpu %d\n", cpu);
+		dev = get_cpu_device(cpu);
+
+		if (!dev) {
+			pr_info("invalid dev pointer for cbe_thermal\n");
+			return -EINVAL;
+		}
+
+		pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+
+		if (!pmd_regs) {
+			pr_info("invalid CBE regs pointer for cbe_thermal\n");
+			return -EINVAL;
+		}
+
+		out_be64(&pmd_regs->tm_str2, str2);
+		out_be64(&pmd_regs->tm_str1.val, str1.val);
+		out_be64(&pmd_regs->tm_tpr.val, tpr.val);
+		out_be64(&pmd_regs->tm_cr1.val, cr1.val);
+		out_be64(&pmd_regs->tm_cr2, cr2);
+	}
+
+	return 0;
+}
+
+
+static int __init thermal_init(void)
+{
+	int rc = init_default_values();
+
+	if (rc == 0) {
+		spu_add_dev_attr_group(&spu_attribute_group);
+		cpu_add_dev_attr_group(&ppe_attribute_group);
+	}
+
+	return rc;
+}
+module_init(thermal_init);
+
+static void __exit thermal_exit(void)
+{
+	spu_remove_dev_attr_group(&spu_attribute_group);
+	cpu_remove_dev_attr_group(&ppe_attribute_group);
+}
+module_exit(thermal_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
new file mode 100644
index 00000000000..173568140a3
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -0,0 +1,500 @@
+/*
+ * Support for PCI on Celleb platform.
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/kernel/rtas_pci.c:
+ *  Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ *  Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/pci_regs.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+#include "celleb_pci.h"
+
+#define MAX_PCI_DEVICES    32
+#define MAX_PCI_FUNCTIONS   8
+#define MAX_PCI_BASE_ADDRS  3 /* use 64 bit address */
+
+/* definition for fake pci configuration area for GbE, .... ,and etc. */
+
+struct celleb_pci_resource {
+	struct resource r[MAX_PCI_BASE_ADDRS];
+};
+
+struct celleb_pci_private {
+	unsigned char *fake_config[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
+	struct celleb_pci_resource *res[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
+};
+
+static inline u8 celleb_fake_config_readb(void *addr)
+{
+	u8 *p = addr;
+	return *p;
+}
+
+static inline u16 celleb_fake_config_readw(void *addr)
+{
+	__le16 *p = addr;
+	return le16_to_cpu(*p);
+}
+
+static inline u32 celleb_fake_config_readl(void *addr)
+{
+	__le32 *p = addr;
+	return le32_to_cpu(*p);
+}
+
+static inline void celleb_fake_config_writeb(u32 val, void *addr)
+{
+	u8 *p = addr;
+	*p = val;
+}
+
+static inline void celleb_fake_config_writew(u32 val, void *addr)
+{
+	__le16 val16;
+	__le16 *p = addr;
+	val16 = cpu_to_le16(val);
+	*p = val16;
+}
+
+static inline void celleb_fake_config_writel(u32 val, void *addr)
+{
+	__le32 val32;
+	__le32 *p = addr;
+	val32 = cpu_to_le32(val);
+	*p = val32;
+}
+
+static unsigned char *get_fake_config_start(struct pci_controller *hose,
+					    int devno, int fn)
+{
+	struct celleb_pci_private *private = hose->private_data;
+
+	if (private == NULL)
+		return NULL;
+
+	return private->fake_config[devno][fn];
+}
+
+static struct celleb_pci_resource *get_resource_start(
+				struct pci_controller *hose,
+				int devno, int fn)
+{
+	struct celleb_pci_private *private = hose->private_data;
+
+	if (private == NULL)
+		return NULL;
+
+	return private->res[devno][fn];
+}
+
+
+static void celleb_config_read_fake(unsigned char *config, int where,
+				    int size, u32 *val)
+{
+	char *p = config + where;
+
+	switch (size) {
+	case 1:
+		*val = celleb_fake_config_readb(p);
+		break;
+	case 2:
+		*val = celleb_fake_config_readw(p);
+		break;
+	case 4:
+		*val = celleb_fake_config_readl(p);
+		break;
+	}
+}
+
+static void celleb_config_write_fake(unsigned char *config, int where,
+				     int size, u32 val)
+{
+	char *p = config + where;
+
+	switch (size) {
+	case 1:
+		celleb_fake_config_writeb(val, p);
+		break;
+	case 2:
+		celleb_fake_config_writew(val, p);
+		break;
+	case 4:
+		celleb_fake_config_writel(val, p);
+		break;
+	}
+}
+
+static int celleb_fake_pci_read_config(struct pci_bus *bus,
+		unsigned int devfn, int where, int size, u32 *val)
+{
+	char *config;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	unsigned int devno = devfn >> 3;
+	unsigned int fn = devfn & 0x7;
+
+	/* allignment check */
+	BUG_ON(where % size);
+
+	pr_debug("    fake read: bus=0x%x, ", bus->number);
+	config = get_fake_config_start(hose, devno, fn);
+
+	pr_debug("devno=0x%x, where=0x%x, size=0x%x, ", devno, where, size);
+	if (!config) {
+		pr_debug("failed\n");
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	celleb_config_read_fake(config, where, size, val);
+	pr_debug("val=0x%x\n", *val);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+
+static int celleb_fake_pci_write_config(struct pci_bus *bus,
+		unsigned int devfn, int where, int size, u32 val)
+{
+	char *config;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+	struct celleb_pci_resource *res;
+	unsigned int devno = devfn >> 3;
+	unsigned int fn = devfn & 0x7;
+
+	/* allignment check */
+	BUG_ON(where % size);
+
+	config = get_fake_config_start(hose, devno, fn);
+
+	if (!config)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (val == ~0) {
+		int i = (where - PCI_BASE_ADDRESS_0) >> 3;
+
+		switch (where) {
+		case PCI_BASE_ADDRESS_0:
+		case PCI_BASE_ADDRESS_2:
+			if (size != 4)
+				return PCIBIOS_DEVICE_NOT_FOUND;
+			res = get_resource_start(hose, devno, fn);
+			if (!res)
+				return PCIBIOS_DEVICE_NOT_FOUND;
+			celleb_config_write_fake(config, where, size,
+					(res->r[i].end - res->r[i].start));
+			return PCIBIOS_SUCCESSFUL;
+		case PCI_BASE_ADDRESS_1:
+		case PCI_BASE_ADDRESS_3:
+		case PCI_BASE_ADDRESS_4:
+		case PCI_BASE_ADDRESS_5:
+			break;
+		default:
+			break;
+		}
+	}
+
+	celleb_config_write_fake(config, where, size, val);
+	pr_debug("    fake write: where=%x, size=%d, val=%x\n",
+		 where, size, val);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops celleb_fake_pci_ops = {
+	.read = celleb_fake_pci_read_config,
+	.write = celleb_fake_pci_write_config,
+};
+
+static inline void celleb_setup_pci_base_addrs(struct pci_controller *hose,
+					unsigned int devno, unsigned int fn,
+					unsigned int num_base_addr)
+{
+	u32 val;
+	unsigned char *config;
+	struct celleb_pci_resource *res;
+
+	config = get_fake_config_start(hose, devno, fn);
+	res = get_resource_start(hose, devno, fn);
+
+	if (!config || !res)
+		return;
+
+	switch (num_base_addr) {
+	case 3:
+		val = (res->r[2].start & 0xfffffff0)
+		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_4, 4, val);
+		val = res->r[2].start >> 32;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_5, 4, val);
+		/* FALLTHROUGH */
+	case 2:
+		val = (res->r[1].start & 0xfffffff0)
+		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_2, 4, val);
+		val = res->r[1].start >> 32;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_3, 4, val);
+		/* FALLTHROUGH */
+	case 1:
+		val = (res->r[0].start & 0xfffffff0)
+		    | PCI_BASE_ADDRESS_MEM_TYPE_64;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_0, 4, val);
+		val = res->r[0].start >> 32;
+		celleb_config_write_fake(config, PCI_BASE_ADDRESS_1, 4, val);
+		break;
+	}
+
+	val = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+	celleb_config_write_fake(config, PCI_COMMAND, 2, val);
+}
+
+static int __init celleb_setup_fake_pci_device(struct device_node *node,
+					       struct pci_controller *hose)
+{
+	unsigned int rlen;
+	int num_base_addr = 0;
+	u32 val;
+	const u32 *wi0, *wi1, *wi2, *wi3, *wi4;
+	unsigned int devno, fn;
+	struct celleb_pci_private *private = hose->private_data;
+	unsigned char **config = NULL;
+	struct celleb_pci_resource **res = NULL;
+	const char *name;
+	const unsigned long *li;
+	int size, result;
+
+	if (private == NULL) {
+		printk(KERN_ERR "PCI: "
+		       "memory space for pci controller is not assigned\n");
+		goto error;
+	}
+
+	name = of_get_property(node, "model", &rlen);
+	if (!name) {
+		printk(KERN_ERR "PCI: model property not found.\n");
+		goto error;
+	}
+
+	wi4 = of_get_property(node, "reg", &rlen);
+	if (wi4 == NULL)
+		goto error;
+
+	devno = ((wi4[0] >> 8) & 0xff) >> 3;
+	fn = (wi4[0] >> 8) & 0x7;
+
+	pr_debug("PCI: celleb_setup_fake_pci() %s devno=%x fn=%x\n", name,
+		 devno, fn);
+
+	size = 256;
+	config = &private->fake_config[devno][fn];
+	*config = zalloc_maybe_bootmem(size, GFP_KERNEL);
+	if (*config == NULL) {
+		printk(KERN_ERR "PCI: "
+		       "not enough memory for fake configuration space\n");
+		goto error;
+	}
+	pr_debug("PCI: fake config area assigned 0x%016lx\n",
+		 (unsigned long)*config);
+
+	size = sizeof(struct celleb_pci_resource);
+	res = &private->res[devno][fn];
+	*res = zalloc_maybe_bootmem(size, GFP_KERNEL);
+	if (*res == NULL) {
+		printk(KERN_ERR
+		       "PCI: not enough memory for resource data space\n");
+		goto error;
+	}
+	pr_debug("PCI: res assigned 0x%016lx\n", (unsigned long)*res);
+
+	wi0 = of_get_property(node, "device-id", NULL);
+	wi1 = of_get_property(node, "vendor-id", NULL);
+	wi2 = of_get_property(node, "class-code", NULL);
+	wi3 = of_get_property(node, "revision-id", NULL);
+	if (!wi0 || !wi1 || !wi2 || !wi3) {
+		printk(KERN_ERR "PCI: Missing device tree properties.\n");
+		goto error;
+	}
+
+	celleb_config_write_fake(*config, PCI_DEVICE_ID, 2, wi0[0] & 0xffff);
+	celleb_config_write_fake(*config, PCI_VENDOR_ID, 2, wi1[0] & 0xffff);
+	pr_debug("class-code = 0x%08x\n", wi2[0]);
+
+	celleb_config_write_fake(*config, PCI_CLASS_PROG, 1, wi2[0] & 0xff);
+	celleb_config_write_fake(*config, PCI_CLASS_DEVICE, 2,
+				 (wi2[0] >> 8) & 0xffff);
+	celleb_config_write_fake(*config, PCI_REVISION_ID, 1, wi3[0]);
+
+	while (num_base_addr < MAX_PCI_BASE_ADDRS) {
+		result = of_address_to_resource(node,
+				num_base_addr, &(*res)->r[num_base_addr]);
+		if (result)
+			break;
+		num_base_addr++;
+	}
+
+	celleb_setup_pci_base_addrs(hose, devno, fn, num_base_addr);
+
+	li = of_get_property(node, "interrupts", &rlen);
+	if (!li) {
+		printk(KERN_ERR "PCI: interrupts not found.\n");
+		goto error;
+	}
+	val = li[0];
+	celleb_config_write_fake(*config, PCI_INTERRUPT_PIN, 1, 1);
+	celleb_config_write_fake(*config, PCI_INTERRUPT_LINE, 1, val);
+
+#ifdef DEBUG
+	pr_debug("PCI: %s irq=%ld\n", name, li[0]);
+	for (i = 0; i < 6; i++) {
+		celleb_config_read_fake(*config,
+					PCI_BASE_ADDRESS_0 + 0x4 * i, 4,
+					&val);
+		pr_debug("PCI: %s fn=%d base_address_%d=0x%x\n",
+			 name, fn, i, val);
+	}
+#endif
+
+	celleb_config_write_fake(*config, PCI_HEADER_TYPE, 1,
+				 PCI_HEADER_TYPE_NORMAL);
+
+	return 0;
+
+error:
+	if (mem_init_done) {
+		if (config && *config)
+			kfree(*config);
+		if (res && *res)
+			kfree(*res);
+
+	} else {
+		if (config && *config) {
+			size = 256;
+			free_bootmem(__pa(*config), size);
+		}
+		if (res && *res) {
+			size = sizeof(struct celleb_pci_resource);
+			free_bootmem(__pa(*res), size);
+		}
+	}
+
+	return 1;
+}
+
+static int __init phb_set_bus_ranges(struct device_node *dev,
+				     struct pci_controller *phb)
+{
+	const int *bus_range;
+	unsigned int len;
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int))
+		return 1;
+
+	phb->first_busno = bus_range[0];
+	phb->last_busno = bus_range[1];
+
+	return 0;
+}
+
+static void __init celleb_alloc_private_mem(struct pci_controller *hose)
+{
+	hose->private_data =
+		zalloc_maybe_bootmem(sizeof(struct celleb_pci_private),
+			GFP_KERNEL);
+}
+
+static int __init celleb_setup_fake_pci(struct device_node *dev,
+					struct pci_controller *phb)
+{
+	struct device_node *node;
+
+	phb->ops = &celleb_fake_pci_ops;
+	celleb_alloc_private_mem(phb);
+
+	for (node = of_get_next_child(dev, NULL);
+	     node != NULL; node = of_get_next_child(dev, node))
+		celleb_setup_fake_pci_device(node, phb);
+
+	return 0;
+}
+
+static struct celleb_phb_spec celleb_fake_pci_spec __initdata = {
+	.setup = celleb_setup_fake_pci,
+};
+
+static struct of_device_id celleb_phb_match[] __initdata = {
+	{
+		.name = "pci-pseudo",
+		.data = &celleb_fake_pci_spec,
+	}, {
+		.name = "epci",
+		.data = &celleb_epci_spec,
+	}, {
+		.name = "pcie",
+		.data = &celleb_pciex_spec,
+	}, {
+	},
+};
+
+int __init celleb_setup_phb(struct pci_controller *phb)
+{
+	struct device_node *dev = phb->dn;
+	const struct of_device_id *match;
+	const struct celleb_phb_spec *phb_spec;
+	int rc;
+
+	match = of_match_node(celleb_phb_match, dev);
+	if (!match)
+		return 1;
+
+	phb_set_bus_ranges(dev, phb);
+	phb->buid = 1;
+
+	phb_spec = match->data;
+	rc = (*phb_spec->setup)(dev, phb);
+	if (rc)
+		return 1;
+
+	if (phb_spec->ops)
+		iowa_register_bus(phb, phb_spec->ops,
+				  phb_spec->iowa_init,
+				  phb_spec->iowa_data);
+	return 0;
+}
+
+int celleb_pci_probe_mode(struct pci_bus *bus)
+{
+	return PCI_PROBE_DEVTREE;
+}
diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
new file mode 100644
index 00000000000..a801fcc5f38
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_pci.h
@@ -0,0 +1,46 @@
+/*
+ * pci prototypes for Celleb platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _CELLEB_PCI_H
+#define _CELLEB_PCI_H
+
+#include <linux/pci.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <asm/ppc-pci.h>
+#include <asm/io-workarounds.h>
+
+struct iowa_bus;
+
+struct celleb_phb_spec {
+	int (*setup)(struct device_node *, struct pci_controller *);
+	struct ppc_pci_io *ops;
+	int (*iowa_init)(struct iowa_bus *, void *);
+	void *iowa_data;
+};
+
+extern int celleb_setup_phb(struct pci_controller *);
+extern int celleb_pci_probe_mode(struct pci_bus *);
+
+extern struct celleb_phb_spec celleb_epci_spec;
+extern struct celleb_phb_spec celleb_pciex_spec;
+
+#endif /* _CELLEB_PCI_H */
diff --git a/arch/powerpc/platforms/cell/celleb_scc.h b/arch/powerpc/platforms/cell/celleb_scc.h
new file mode 100644
index 00000000000..b596a711c34
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc.h
@@ -0,0 +1,232 @@
+/*
+ * SCC (Super Companion Chip) definitions
+ *
+ * (C) Copyright 2004-2006 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _CELLEB_SCC_H
+#define _CELLEB_SCC_H
+
+#define PCI_VENDOR_ID_TOSHIBA_2                 0x102f
+#define PCI_DEVICE_ID_TOSHIBA_SCC_PCIEXC_BRIDGE 0x01b0
+#define PCI_DEVICE_ID_TOSHIBA_SCC_EPCI_BRIDGE   0x01b1
+#define PCI_DEVICE_ID_TOSHIBA_SCC_BRIDGE        0x01b2
+#define PCI_DEVICE_ID_TOSHIBA_SCC_GBE           0x01b3
+#define PCI_DEVICE_ID_TOSHIBA_SCC_ATA           0x01b4
+#define PCI_DEVICE_ID_TOSHIBA_SCC_USB2          0x01b5
+#define PCI_DEVICE_ID_TOSHIBA_SCC_USB           0x01b6
+#define PCI_DEVICE_ID_TOSHIBA_SCC_ENCDEC        0x01b7
+
+#define SCC_EPCI_REG            0x0000d000
+
+/* EPCI registers */
+#define SCC_EPCI_CNF10_REG      0x010
+#define SCC_EPCI_CNF14_REG      0x014
+#define SCC_EPCI_CNF18_REG      0x018
+#define SCC_EPCI_PVBAT          0x100
+#define SCC_EPCI_VPMBAT         0x104
+#define SCC_EPCI_VPIBAT         0x108
+#define SCC_EPCI_VCSR           0x110
+#define SCC_EPCI_VIENAB         0x114
+#define SCC_EPCI_VISTAT         0x118
+#define SCC_EPCI_VRDCOUNT       0x124
+#define SCC_EPCI_BAM0           0x12c
+#define SCC_EPCI_BAM1           0x134
+#define SCC_EPCI_BAM2           0x13c
+#define SCC_EPCI_IADR           0x164
+#define SCC_EPCI_CLKRST         0x800
+#define SCC_EPCI_INTSET         0x804
+#define SCC_EPCI_STATUS         0x808
+#define SCC_EPCI_ABTSET         0x80c
+#define SCC_EPCI_WATRP          0x810
+#define SCC_EPCI_DUMYRADR       0x814
+#define SCC_EPCI_SWRESP         0x818
+#define SCC_EPCI_CNTOPT         0x81c
+#define SCC_EPCI_ECMODE         0xf00
+#define SCC_EPCI_IOM_AC_NUM     5
+#define SCC_EPCI_IOM_ACTE(n)    (0xf10 + (n) * 4)
+#define SCC_EPCI_IOT_AC_NUM     4
+#define SCC_EPCI_IOT_ACTE(n)    (0xf30 + (n) * 4)
+#define SCC_EPCI_MAEA           0xf50
+#define SCC_EPCI_MAEC           0xf54
+#define SCC_EPCI_CKCTRL         0xff0
+
+/* bits for SCC_EPCI_VCSR */
+#define SCC_EPCI_VCSR_FRE       0x00020000
+#define SCC_EPCI_VCSR_FWE       0x00010000
+#define SCC_EPCI_VCSR_DR        0x00000400
+#define SCC_EPCI_VCSR_SR        0x00000008
+#define SCC_EPCI_VCSR_AT        0x00000004
+
+/* bits for SCC_EPCI_VIENAB/SCC_EPCI_VISTAT */
+#define SCC_EPCI_VISTAT_PMPE    0x00000008
+#define SCC_EPCI_VISTAT_PMFE    0x00000004
+#define SCC_EPCI_VISTAT_PRA     0x00000002
+#define SCC_EPCI_VISTAT_PRD     0x00000001
+#define SCC_EPCI_VISTAT_ALL     0x0000000f
+
+#define SCC_EPCI_VIENAB_PMPEE   0x00000008
+#define SCC_EPCI_VIENAB_PMFEE   0x00000004
+#define SCC_EPCI_VIENAB_PRA     0x00000002
+#define SCC_EPCI_VIENAB_PRD     0x00000001
+#define SCC_EPCI_VIENAB_ALL     0x0000000f
+
+/* bits for SCC_EPCI_CLKRST */
+#define SCC_EPCI_CLKRST_CKS_MASK 0x00030000
+#define SCC_EPCI_CLKRST_CKS_2   0x00000000
+#define SCC_EPCI_CLKRST_CKS_4   0x00010000
+#define SCC_EPCI_CLKRST_CKS_8   0x00020000
+#define SCC_EPCI_CLKRST_PCICRST 0x00000400
+#define SCC_EPCI_CLKRST_BC      0x00000200
+#define SCC_EPCI_CLKRST_PCIRST  0x00000100
+#define SCC_EPCI_CLKRST_PCKEN   0x00000001
+
+/* bits for SCC_EPCI_INTSET/SCC_EPCI_STATUS */
+#define SCC_EPCI_INT_2M         0x01000000
+#define SCC_EPCI_INT_RERR       0x00200000
+#define SCC_EPCI_INT_SERR       0x00100000
+#define SCC_EPCI_INT_PRTER      0x00080000
+#define SCC_EPCI_INT_SER        0x00040000
+#define SCC_EPCI_INT_PER        0x00020000
+#define SCC_EPCI_INT_PAI        0x00010000
+#define SCC_EPCI_INT_1M         0x00000100
+#define SCC_EPCI_INT_PME        0x00000010
+#define SCC_EPCI_INT_INTD       0x00000008
+#define SCC_EPCI_INT_INTC       0x00000004
+#define SCC_EPCI_INT_INTB       0x00000002
+#define SCC_EPCI_INT_INTA       0x00000001
+#define SCC_EPCI_INT_DEVINT     0x0000000f
+#define SCC_EPCI_INT_ALL        0x003f001f
+#define SCC_EPCI_INT_ALLERR     0x003f0000
+
+/* bits for SCC_EPCI_CKCTRL */
+#define SCC_EPCI_CKCTRL_CRST0   0x00010000
+#define SCC_EPCI_CKCTRL_CRST1   0x00020000
+#define SCC_EPCI_CKCTRL_OCLKEN  0x00000100
+#define SCC_EPCI_CKCTRL_LCLKEN  0x00000001
+
+#define SCC_EPCI_IDSEL_AD_TO_SLOT(ad)       ((ad) - 10)
+#define SCC_EPCI_MAX_DEVNU      SCC_EPCI_IDSEL_AD_TO_SLOT(32)
+
+/* bits for SCC_EPCI_CNTOPT */
+#define SCC_EPCI_CNTOPT_O2PMB   0x00000002
+
+/* SCC PCIEXC SMMIO registers */
+#define PEXCADRS		0x000
+#define PEXCWDATA		0x004
+#define PEXCRDATA		0x008
+#define PEXDADRS		0x010
+#define PEXDCMND		0x014
+#define PEXDWDATA		0x018
+#define PEXDRDATA		0x01c
+#define PEXREQID		0x020
+#define PEXTIDMAP		0x024
+#define PEXINTMASK		0x028
+#define PEXINTSTS		0x02c
+#define PEXAERRMASK		0x030
+#define PEXAERRSTS		0x034
+#define PEXPRERRMASK		0x040
+#define PEXPRERRSTS		0x044
+#define PEXPRERRID01		0x048
+#define PEXPRERRID23		0x04c
+#define PEXVDMASK		0x050
+#define PEXVDSTS		0x054
+#define PEXRCVCPLIDA		0x060
+#define PEXLENERRIDA		0x068
+#define PEXPHYPLLST		0x070
+#define PEXDMRDEN0		0x100
+#define PEXDMRDADR0		0x104
+#define PEXDMRDENX		0x110
+#define PEXDMRDADRX		0x114
+#define PEXECMODE		0xf00
+#define PEXMAEA(n)		(0xf50 + (8 * n))
+#define PEXMAEC(n)		(0xf54 + (8 * n))
+#define PEXCCRCTRL		0xff0
+
+/* SCC PCIEXC bits and shifts for PEXCADRS */
+#define PEXCADRS_BYTE_EN_SHIFT		20
+#define PEXCADRS_CMD_SHIFT		16
+#define PEXCADRS_CMD_READ		(0xa << PEXCADRS_CMD_SHIFT)
+#define PEXCADRS_CMD_WRITE		(0xb << PEXCADRS_CMD_SHIFT)
+
+/* SCC PCIEXC shifts for PEXDADRS */
+#define PEXDADRS_BUSNO_SHIFT		20
+#define PEXDADRS_DEVNO_SHIFT		15
+#define PEXDADRS_FUNCNO_SHIFT		12
+
+/* SCC PCIEXC bits and shifts for PEXDCMND */
+#define PEXDCMND_BYTE_EN_SHIFT		4
+#define PEXDCMND_IO_READ		0x2
+#define PEXDCMND_IO_WRITE		0x3
+#define PEXDCMND_CONFIG_READ		0xa
+#define PEXDCMND_CONFIG_WRITE		0xb
+
+/* SCC PCIEXC bits for PEXPHYPLLST */
+#define PEXPHYPLLST_PEXPHYAPLLST	0x00000001
+
+/* SCC PCIEXC bits for PEXECMODE */
+#define PEXECMODE_ALL_THROUGH		0x00000000
+#define PEXECMODE_ALL_8BIT		0x00550155
+#define PEXECMODE_ALL_16BIT		0x00aa02aa
+
+/* SCC PCIEXC bits for PEXCCRCTRL */
+#define PEXCCRCTRL_PEXIPCOREEN		0x00040000
+#define PEXCCRCTRL_PEXIPCONTEN		0x00020000
+#define PEXCCRCTRL_PEXPHYPLLEN		0x00010000
+#define PEXCCRCTRL_PCIEXCAOCKEN		0x00000100
+
+/* SCC PCIEXC port configuration registers */
+#define PEXTCERRCHK		0x21c
+#define PEXTAMAPB0		0x220
+#define PEXTAMAPL0		0x224
+#define PEXTAMAPB(n)		(PEXTAMAPB0 + 8 * (n))
+#define PEXTAMAPL(n)		(PEXTAMAPL0 + 8 * (n))
+#define PEXCHVC0P		0x500
+#define PEXCHVC0NP		0x504
+#define PEXCHVC0C		0x508
+#define PEXCDVC0P		0x50c
+#define PEXCDVC0NP		0x510
+#define PEXCDVC0C		0x514
+#define PEXCHVCXP		0x518
+#define PEXCHVCXNP		0x51c
+#define PEXCHVCXC		0x520
+#define PEXCDVCXP		0x524
+#define PEXCDVCXNP		0x528
+#define PEXCDVCXC		0x52c
+#define PEXCTTRG		0x530
+#define PEXTSCTRL		0x700
+#define PEXTSSTS		0x704
+#define PEXSKPCTRL		0x708
+
+/* UHC registers */
+#define SCC_UHC_CKRCTRL         0xff0
+#define SCC_UHC_ECMODE          0xf00
+
+/* bits for SCC_UHC_CKRCTRL */
+#define SCC_UHC_F48MCKLEN       0x00000001
+#define SCC_UHC_P_SUSPEND       0x00000002
+#define SCC_UHC_PHY_SUSPEND_SEL 0x00000004
+#define SCC_UHC_HCLKEN          0x00000100
+#define SCC_UHC_USBEN           0x00010000
+#define SCC_UHC_USBCEN          0x00020000
+#define SCC_UHC_PHYEN           0x00040000
+
+/* bits for SCC_UHC_ECMODE */
+#define SCC_UHC_ECMODE_BY_BYTE  0x00000555
+#define SCC_UHC_ECMODE_BY_WORD  0x00000aaa
+
+#endif /* _CELLEB_SCC_H */
diff --git a/arch/powerpc/platforms/cell/celleb_scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c
new file mode 100644
index 00000000000..844c0facb4f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_epci.c
@@ -0,0 +1,429 @@
+/*
+ * Support for SCC external PCI
+ *
+ * (C) Copyright 2004-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/pci_regs.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+#include "celleb_scc.h"
+#include "celleb_pci.h"
+
+#define MAX_PCI_DEVICES   32
+#define MAX_PCI_FUNCTIONS  8
+
+#define iob()  __asm__ __volatile__("eieio; sync":::"memory")
+
+static inline PCI_IO_ADDR celleb_epci_get_epci_base(
+					struct pci_controller *hose)
+{
+	/*
+	 * Note:
+	 * Celleb epci uses cfg_addr as a base address for
+	 * epci control registers.
+	 */
+
+	return hose->cfg_addr;
+}
+
+static inline PCI_IO_ADDR celleb_epci_get_epci_cfg(
+					struct pci_controller *hose)
+{
+	/*
+	 * Note:
+	 * Celleb epci uses cfg_data as a base address for
+	 * configuration area for epci devices.
+	 */
+
+	return hose->cfg_data;
+}
+
+static inline void clear_and_disable_master_abort_interrupt(
+					struct pci_controller *hose)
+{
+	PCI_IO_ADDR epci_base;
+	PCI_IO_ADDR reg;
+	epci_base = celleb_epci_get_epci_base(hose);
+	reg = epci_base + PCI_COMMAND;
+	out_be32(reg, in_be32(reg) | (PCI_STATUS_REC_MASTER_ABORT << 16));
+}
+
+static int celleb_epci_check_abort(struct pci_controller *hose,
+				   PCI_IO_ADDR addr)
+{
+	PCI_IO_ADDR reg;
+	PCI_IO_ADDR epci_base;
+	u32 val;
+
+	iob();
+	epci_base = celleb_epci_get_epci_base(hose);
+
+	reg = epci_base + PCI_COMMAND;
+	val = in_be32(reg);
+
+	if (val & (PCI_STATUS_REC_MASTER_ABORT << 16)) {
+		out_be32(reg,
+			 (val & 0xffff) | (PCI_STATUS_REC_MASTER_ABORT << 16));
+
+		/* clear PCI Controller error, FRE, PMFE */
+		reg = epci_base + SCC_EPCI_STATUS;
+		out_be32(reg, SCC_EPCI_INT_PAI);
+
+		reg = epci_base + SCC_EPCI_VCSR;
+		val = in_be32(reg) & 0xffff;
+		val |= SCC_EPCI_VCSR_FRE;
+		out_be32(reg, val);
+
+		reg = epci_base + SCC_EPCI_VISTAT;
+		out_be32(reg, SCC_EPCI_VISTAT_PMFE);
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static PCI_IO_ADDR celleb_epci_make_config_addr(struct pci_bus *bus,
+		struct pci_controller *hose, unsigned int devfn, int where)
+{
+	PCI_IO_ADDR addr;
+
+	if (bus != hose->bus)
+		addr = celleb_epci_get_epci_cfg(hose) +
+		       (((bus->number & 0xff) << 16)
+			| ((devfn & 0xff) << 8)
+			| (where & 0xff)
+			| 0x01000000);
+	else
+		addr = celleb_epci_get_epci_cfg(hose) +
+		       (((devfn & 0xff) << 8) | (where & 0xff));
+
+	pr_debug("EPCI: config_addr = 0x%p\n", addr);
+
+	return addr;
+}
+
+static int celleb_epci_read_config(struct pci_bus *bus,
+			unsigned int devfn, int where, int size, u32 *val)
+{
+	PCI_IO_ADDR epci_base;
+	PCI_IO_ADDR addr;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	/* allignment check */
+	BUG_ON(where % size);
+
+	if (!celleb_epci_get_epci_cfg(hose))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (bus->number == hose->first_busno && devfn == 0) {
+		/* EPCI controller self */
+
+		epci_base = celleb_epci_get_epci_base(hose);
+		addr = epci_base + where;
+
+		switch (size) {
+		case 1:
+			*val = in_8(addr);
+			break;
+		case 2:
+			*val = in_be16(addr);
+			break;
+		case 4:
+			*val = in_be32(addr);
+			break;
+		default:
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+
+	} else {
+
+		clear_and_disable_master_abort_interrupt(hose);
+		addr = celleb_epci_make_config_addr(bus, hose, devfn, where);
+
+		switch (size) {
+		case 1:
+			*val = in_8(addr);
+			break;
+		case 2:
+			*val = in_le16(addr);
+			break;
+		case 4:
+			*val = in_le32(addr);
+			break;
+		default:
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+	}
+
+	pr_debug("EPCI: "
+		 "addr=0x%p, devfn=0x%x, where=0x%x, size=0x%x, val=0x%x\n",
+		 addr, devfn, where, size, *val);
+
+	return celleb_epci_check_abort(hose, NULL);
+}
+
+static int celleb_epci_write_config(struct pci_bus *bus,
+			unsigned int devfn, int where, int size, u32 val)
+{
+	PCI_IO_ADDR epci_base;
+	PCI_IO_ADDR addr;
+	struct pci_controller *hose = pci_bus_to_host(bus);
+
+	/* allignment check */
+	BUG_ON(where % size);
+
+	if (!celleb_epci_get_epci_cfg(hose))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (bus->number == hose->first_busno && devfn == 0) {
+		/* EPCI controller self */
+
+		epci_base = celleb_epci_get_epci_base(hose);
+		addr = epci_base + where;
+
+		switch (size) {
+		case 1:
+			out_8(addr, val);
+			break;
+		case 2:
+			out_be16(addr, val);
+			break;
+		case 4:
+			out_be32(addr, val);
+			break;
+		default:
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+
+	} else {
+
+		clear_and_disable_master_abort_interrupt(hose);
+		addr = celleb_epci_make_config_addr(bus, hose, devfn, where);
+
+		switch (size) {
+		case 1:
+			out_8(addr, val);
+			break;
+		case 2:
+			out_le16(addr, val);
+			break;
+		case 4:
+			out_le32(addr, val);
+			break;
+		default:
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+	}
+
+	return celleb_epci_check_abort(hose, addr);
+}
+
+struct pci_ops celleb_epci_ops = {
+	.read = celleb_epci_read_config,
+	.write = celleb_epci_write_config,
+};
+
+/* to be moved in FW */
+static int __init celleb_epci_init(struct pci_controller *hose)
+{
+	u32 val;
+	PCI_IO_ADDR reg;
+	PCI_IO_ADDR epci_base;
+	int hwres = 0;
+
+	epci_base = celleb_epci_get_epci_base(hose);
+
+	/* PCI core reset(Internal bus and PCI clock) */
+	reg = epci_base + SCC_EPCI_CKCTRL;
+	val = in_be32(reg);
+	if (val == 0x00030101)
+		hwres = 1;
+	else {
+		val &= ~(SCC_EPCI_CKCTRL_CRST0 | SCC_EPCI_CKCTRL_CRST1);
+		out_be32(reg, val);
+
+		/* set PCI core clock */
+		val = in_be32(reg);
+		val |= (SCC_EPCI_CKCTRL_OCLKEN | SCC_EPCI_CKCTRL_LCLKEN);
+		out_be32(reg, val);
+
+		/* release PCI core reset (internal bus) */
+		val = in_be32(reg);
+		val |= SCC_EPCI_CKCTRL_CRST0;
+		out_be32(reg, val);
+
+		/* set PCI clock select */
+		reg = epci_base + SCC_EPCI_CLKRST;
+		val = in_be32(reg);
+		val &= ~SCC_EPCI_CLKRST_CKS_MASK;
+		val |= SCC_EPCI_CLKRST_CKS_2;
+		out_be32(reg, val);
+
+		/* set arbiter */
+		reg = epci_base + SCC_EPCI_ABTSET;
+		out_be32(reg, 0x0f1f001f);	/* temporary value */
+
+		/* buffer on */
+		reg = epci_base + SCC_EPCI_CLKRST;
+		val = in_be32(reg);
+		val |= SCC_EPCI_CLKRST_BC;
+		out_be32(reg, val);
+
+		/* PCI clock enable */
+		val = in_be32(reg);
+		val |= SCC_EPCI_CLKRST_PCKEN;
+		out_be32(reg, val);
+
+		/* release PCI core reset (all) */
+		reg = epci_base + SCC_EPCI_CKCTRL;
+		val = in_be32(reg);
+		val |= (SCC_EPCI_CKCTRL_CRST0 | SCC_EPCI_CKCTRL_CRST1);
+		out_be32(reg, val);
+
+		/* set base translation registers. (already set by Beat) */
+
+		/* set base address masks. (already set by Beat) */
+	}
+
+	/* release interrupt masks and clear all interrupts */
+	reg = epci_base + SCC_EPCI_INTSET;
+	out_be32(reg, 0x013f011f);	/* all interrupts enable */
+	reg = epci_base + SCC_EPCI_VIENAB;
+	val = SCC_EPCI_VIENAB_PMPEE | SCC_EPCI_VIENAB_PMFEE;
+	out_be32(reg, val);
+	reg = epci_base + SCC_EPCI_STATUS;
+	out_be32(reg, 0xffffffff);
+	reg = epci_base + SCC_EPCI_VISTAT;
+	out_be32(reg, 0xffffffff);
+
+	/* disable PCI->IB address translation */
+	reg = epci_base + SCC_EPCI_VCSR;
+	val = in_be32(reg);
+	val &= ~(SCC_EPCI_VCSR_DR | SCC_EPCI_VCSR_AT);
+	out_be32(reg, val);
+
+	/* set base addresses. (no need to set?) */
+
+	/* memory space, bus master enable */
+	reg = epci_base + PCI_COMMAND;
+	val = PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+	out_be32(reg, val);
+
+	/* endian mode setup */
+	reg = epci_base + SCC_EPCI_ECMODE;
+	val = 0x00550155;
+	out_be32(reg, val);
+
+	/* set control option */
+	reg = epci_base + SCC_EPCI_CNTOPT;
+	val = in_be32(reg);
+	val |= SCC_EPCI_CNTOPT_O2PMB;
+	out_be32(reg, val);
+
+	/* XXX: temporay: set registers for address conversion setup */
+	reg = epci_base + SCC_EPCI_CNF10_REG;
+	out_be32(reg, 0x80000008);
+	reg = epci_base + SCC_EPCI_CNF14_REG;
+	out_be32(reg, 0x40000008);
+
+	reg = epci_base + SCC_EPCI_BAM0;
+	out_be32(reg, 0x80000000);
+	reg = epci_base + SCC_EPCI_BAM1;
+	out_be32(reg, 0xe0000000);
+
+	reg = epci_base + SCC_EPCI_PVBAT;
+	out_be32(reg, 0x80000000);
+
+	if (!hwres) {
+		/* release external PCI reset */
+		reg = epci_base + SCC_EPCI_CLKRST;
+		val = in_be32(reg);
+		val |= SCC_EPCI_CLKRST_PCIRST;
+		out_be32(reg, val);
+	}
+
+	return 0;
+}
+
+static int __init celleb_setup_epci(struct device_node *node,
+				    struct pci_controller *hose)
+{
+	struct resource r;
+
+	pr_debug("PCI: celleb_setup_epci()\n");
+
+	/*
+	 * Note:
+	 * Celleb epci uses cfg_addr and cfg_data member of
+	 * pci_controller structure in irregular way.
+	 *
+	 * cfg_addr is used to map for control registers of
+	 * celleb epci.
+	 *
+	 * cfg_data is used for configuration area of devices
+	 * on Celleb epci buses.
+	 */
+
+	if (of_address_to_resource(node, 0, &r))
+		goto error;
+	hose->cfg_addr = ioremap(r.start, resource_size(&r));
+	if (!hose->cfg_addr)
+		goto error;
+	pr_debug("EPCI: cfg_addr map 0x%016llx->0x%016lx + 0x%016llx\n",
+		 r.start, (unsigned long)hose->cfg_addr, resource_size(&r));
+
+	if (of_address_to_resource(node, 2, &r))
+		goto error;
+	hose->cfg_data = ioremap(r.start, resource_size(&r));
+	if (!hose->cfg_data)
+		goto error;
+	pr_debug("EPCI: cfg_data map 0x%016llx->0x%016lx + 0x%016llx\n",
+		 r.start, (unsigned long)hose->cfg_data, resource_size(&r));
+
+	hose->ops = &celleb_epci_ops;
+	celleb_epci_init(hose);
+
+	return 0;
+
+error:
+	if (hose->cfg_addr)
+		iounmap(hose->cfg_addr);
+
+	if (hose->cfg_data)
+		iounmap(hose->cfg_data);
+	return 1;
+}
+
+struct celleb_phb_spec celleb_epci_spec __initdata = {
+	.setup = celleb_setup_epci,
+	.ops = &spiderpci_ops,
+	.iowa_init = &spiderpci_iowa_init,
+	.iowa_data = (void *)0,
+};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
new file mode 100644
index 00000000000..4278acfa2ed
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -0,0 +1,539 @@
+/*
+ * Support for Celleb PCI-Express.
+ *
+ * (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/iommu.h>
+#include <asm/byteorder.h>
+
+#include "celleb_scc.h"
+#include "celleb_pci.h"
+
+#define PEX_IN(base, off)	in_be32((void __iomem *)(base) + (off))
+#define PEX_OUT(base, off, data) out_be32((void __iomem *)(base) + (off), (data))
+
+static void scc_pciex_io_flush(struct iowa_bus *bus)
+{
+	(void)PEX_IN(bus->phb->cfg_addr, PEXDMRDEN0);
+}
+
+/*
+ * Memory space access to device on PCIEX
+ */
+#define PCIEX_MMIO_READ(name, ret)					\
+static ret scc_pciex_##name(const PCI_IO_ADDR addr)			\
+{									\
+	ret val = __do_##name(addr);					\
+	scc_pciex_io_flush(iowa_mem_find_bus(addr));			\
+	return val;							\
+}
+
+#define PCIEX_MMIO_READ_STR(name)					\
+static void scc_pciex_##name(const PCI_IO_ADDR addr, void *buf,		\
+			     unsigned long count)			\
+{									\
+	__do_##name(addr, buf, count);					\
+	scc_pciex_io_flush(iowa_mem_find_bus(addr));			\
+}
+
+PCIEX_MMIO_READ(readb, u8)
+PCIEX_MMIO_READ(readw, u16)
+PCIEX_MMIO_READ(readl, u32)
+PCIEX_MMIO_READ(readq, u64)
+PCIEX_MMIO_READ(readw_be, u16)
+PCIEX_MMIO_READ(readl_be, u32)
+PCIEX_MMIO_READ(readq_be, u64)
+PCIEX_MMIO_READ_STR(readsb)
+PCIEX_MMIO_READ_STR(readsw)
+PCIEX_MMIO_READ_STR(readsl)
+
+static void scc_pciex_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+				    unsigned long n)
+{
+	__do_memcpy_fromio(dest, src, n);
+	scc_pciex_io_flush(iowa_mem_find_bus(src));
+}
+
+/*
+ * I/O port access to devices on PCIEX.
+ */
+
+static inline unsigned long get_bus_address(struct pci_controller *phb,
+					    unsigned long port)
+{
+	return port - ((unsigned long)(phb->io_base_virt) - _IO_BASE);
+}
+
+static u32 scc_pciex_read_port(struct pci_controller *phb,
+			       unsigned long port, int size)
+{
+	unsigned int byte_enable;
+	unsigned int cmd, shift;
+	unsigned long addr;
+	u32 data, ret;
+
+	BUG_ON(((port & 0x3ul) + size) > 4);
+
+	addr = get_bus_address(phb, port);
+	shift = addr & 0x3ul;
+	byte_enable = ((1 << size) - 1) << shift;
+	cmd = PEXDCMND_IO_READ | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
+	PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
+	PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
+	data = PEX_IN(phb->cfg_addr, PEXDRDATA);
+	ret = (data >> (shift * 8)) & (0xFFFFFFFF >> ((4 - size) * 8));
+
+	pr_debug("PCIEX:PIO READ:port=0x%lx, addr=0x%lx, size=%d, be=%x,"
+		 " cmd=%x, data=%x, ret=%x\n", port, addr, size, byte_enable,
+		 cmd, data, ret);
+
+	return ret;
+}
+
+static void scc_pciex_write_port(struct pci_controller *phb,
+				 unsigned long port, int size, u32 val)
+{
+	unsigned int byte_enable;
+	unsigned int cmd, shift;
+	unsigned long addr;
+	u32 data;
+
+	BUG_ON(((port & 0x3ul) + size) > 4);
+
+	addr = get_bus_address(phb, port);
+	shift = addr & 0x3ul;
+	byte_enable = ((1 << size) - 1) << shift;
+	cmd = PEXDCMND_IO_WRITE | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
+	data = (val & (0xFFFFFFFF >> (4 - size) * 8)) << (shift * 8);
+	PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
+	PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
+	PEX_OUT(phb->cfg_addr, PEXDWDATA, data);
+
+	pr_debug("PCIEX:PIO WRITE:port=0x%lx, addr=%lx, size=%d, val=%x,"
+		 " be=%x, cmd=%x, data=%x\n", port, addr, size, val,
+		 byte_enable, cmd, data);
+}
+
+static u8 __scc_pciex_inb(struct pci_controller *phb, unsigned long port)
+{
+	return (u8)scc_pciex_read_port(phb, port, 1);
+}
+
+static u16 __scc_pciex_inw(struct pci_controller *phb, unsigned long port)
+{
+	u32 data;
+	if ((port & 0x3ul) < 3)
+		data = scc_pciex_read_port(phb, port, 2);
+	else {
+		u32 d1 = scc_pciex_read_port(phb, port, 1);
+		u32 d2 = scc_pciex_read_port(phb, port + 1, 1);
+		data = d1 | (d2 << 8);
+	}
+	return (u16)data;
+}
+
+static u32 __scc_pciex_inl(struct pci_controller *phb, unsigned long port)
+{
+	unsigned int mod = port & 0x3ul;
+	u32 data;
+	if (mod == 0)
+		data = scc_pciex_read_port(phb, port, 4);
+	else {
+		u32 d1 = scc_pciex_read_port(phb, port, 4 - mod);
+		u32 d2 = scc_pciex_read_port(phb, port + 1, mod);
+		data = d1 | (d2 << (mod * 8));
+	}
+	return data;
+}
+
+static void __scc_pciex_outb(struct pci_controller *phb,
+			     u8 val, unsigned long port)
+{
+	scc_pciex_write_port(phb, port, 1, (u32)val);
+}
+
+static void __scc_pciex_outw(struct pci_controller *phb,
+			     u16 val, unsigned long port)
+{
+	if ((port & 0x3ul) < 3)
+		scc_pciex_write_port(phb, port, 2, (u32)val);
+	else {
+		u32 d1 = val & 0x000000FF;
+		u32 d2 = (val & 0x0000FF00) >> 8;
+		scc_pciex_write_port(phb, port, 1, d1);
+		scc_pciex_write_port(phb, port + 1, 1, d2);
+	}
+}
+
+static void __scc_pciex_outl(struct pci_controller *phb,
+			     u32 val, unsigned long port)
+{
+	unsigned int mod = port & 0x3ul;
+	if (mod == 0)
+		scc_pciex_write_port(phb, port, 4, val);
+	else {
+		u32 d1 = val & (0xFFFFFFFFul >> (mod * 8));
+		u32 d2 = val >> ((4 - mod) * 8);
+		scc_pciex_write_port(phb, port, 4 - mod, d1);
+		scc_pciex_write_port(phb, port + 1, mod, d2);
+	}
+}
+
+#define PCIEX_PIO_FUNC(size, name)					\
+static u##size scc_pciex_in##name(unsigned long port)			\
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(port);			\
+	u##size data = __scc_pciex_in##name(bus->phb, port);		\
+	scc_pciex_io_flush(bus);					\
+	return data;							\
+}									\
+static void scc_pciex_ins##name(unsigned long p, void *b, unsigned long c) \
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(p);			\
+	__le##size *dst = b;						\
+	for (; c != 0; c--, dst++)					\
+		*dst = cpu_to_le##size(__scc_pciex_in##name(bus->phb, p)); \
+	scc_pciex_io_flush(bus);					\
+}									\
+static void scc_pciex_out##name(u##size val, unsigned long port)	\
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(port);			\
+	__scc_pciex_out##name(bus->phb, val, port);			\
+}									\
+static void scc_pciex_outs##name(unsigned long p, const void *b,	\
+				 unsigned long c)			\
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(p);			\
+	const __le##size *src = b;					\
+	for (; c != 0; c--, src++)					\
+		__scc_pciex_out##name(bus->phb, le##size##_to_cpu(*src), p); \
+}
+#define __le8 u8
+#define cpu_to_le8(x) (x)
+#define le8_to_cpu(x) (x)
+PCIEX_PIO_FUNC(8, b)
+PCIEX_PIO_FUNC(16, w)
+PCIEX_PIO_FUNC(32, l)
+
+static struct ppc_pci_io scc_pciex_ops = {
+	.readb = scc_pciex_readb,
+	.readw = scc_pciex_readw,
+	.readl = scc_pciex_readl,
+	.readq = scc_pciex_readq,
+	.readw_be = scc_pciex_readw_be,
+	.readl_be = scc_pciex_readl_be,
+	.readq_be = scc_pciex_readq_be,
+	.readsb = scc_pciex_readsb,
+	.readsw = scc_pciex_readsw,
+	.readsl = scc_pciex_readsl,
+	.memcpy_fromio = scc_pciex_memcpy_fromio,
+	.inb = scc_pciex_inb,
+	.inw = scc_pciex_inw,
+	.inl = scc_pciex_inl,
+	.outb = scc_pciex_outb,
+	.outw = scc_pciex_outw,
+	.outl = scc_pciex_outl,
+	.insb = scc_pciex_insb,
+	.insw = scc_pciex_insw,
+	.insl = scc_pciex_insl,
+	.outsb = scc_pciex_outsb,
+	.outsw = scc_pciex_outsw,
+	.outsl = scc_pciex_outsl,
+};
+
+static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data)
+{
+	dma_addr_t dummy_page_da;
+	void *dummy_page_va;
+
+	dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dummy_page_va) {
+		pr_err("PCIEX:Alloc dummy_page_va failed\n");
+		return -1;
+	}
+
+	dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va,
+				       PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(bus->phb->parent, dummy_page_da)) {
+		pr_err("PCIEX:Map dummy page failed.\n");
+		kfree(dummy_page_va);
+		return -1;
+	}
+
+	PEX_OUT(bus->phb->cfg_addr, PEXDMRDADR0, dummy_page_da);
+
+	return 0;
+}
+
+/*
+ * config space access
+ */
+#define MK_PEXDADRS(bus_no, dev_no, func_no, addr) \
+	((uint32_t)(((addr) & ~0x3UL) | \
+	((bus_no) << PEXDADRS_BUSNO_SHIFT) | \
+	((dev_no)  << PEXDADRS_DEVNO_SHIFT) | \
+	((func_no) << PEXDADRS_FUNCNO_SHIFT)))
+
+#define MK_PEXDCMND_BYTE_EN(addr, size) \
+	((((0x1 << (size))-1) << ((addr) & 0x3)) << PEXDCMND_BYTE_EN_SHIFT)
+#define MK_PEXDCMND(cmd, addr, size) ((cmd) | MK_PEXDCMND_BYTE_EN(addr, size))
+
+static uint32_t config_read_pciex_dev(unsigned int __iomem *base,
+		uint64_t bus_no, uint64_t dev_no, uint64_t func_no,
+		uint64_t off, uint64_t size)
+{
+	uint32_t ret;
+	uint32_t addr, cmd;
+
+	addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
+	cmd = MK_PEXDCMND(PEXDCMND_CONFIG_READ, off, size);
+	PEX_OUT(base, PEXDADRS, addr);
+	PEX_OUT(base, PEXDCMND, cmd);
+	ret = (PEX_IN(base, PEXDRDATA)
+		>> ((off & (4-size)) * 8)) & ((0x1 << (size * 8)) - 1);
+	return ret;
+}
+
+static void config_write_pciex_dev(unsigned int __iomem *base, uint64_t bus_no,
+	uint64_t dev_no, uint64_t func_no, uint64_t off, uint64_t size,
+	uint32_t data)
+{
+	uint32_t addr, cmd;
+
+	addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
+	cmd = MK_PEXDCMND(PEXDCMND_CONFIG_WRITE, off, size);
+	PEX_OUT(base, PEXDADRS, addr);
+	PEX_OUT(base, PEXDCMND, cmd);
+	PEX_OUT(base, PEXDWDATA,
+		(data & ((0x1 << (size * 8)) - 1)) << ((off & (4-size)) * 8));
+}
+
+#define MK_PEXCADRS_BYTE_EN(off, len) \
+	((((0x1 << (len)) - 1) << ((off) & 0x3)) << PEXCADRS_BYTE_EN_SHIFT)
+#define MK_PEXCADRS(cmd, addr, size) \
+	((cmd) | MK_PEXCADRS_BYTE_EN(addr, size) | ((addr) & ~0x3))
+static uint32_t config_read_pciex_rc(unsigned int __iomem *base,
+				     uint32_t where, uint32_t size)
+{
+	PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_READ, where, size));
+	return (PEX_IN(base, PEXCRDATA)
+		>> ((where & (4 - size)) * 8)) & ((0x1 << (size * 8)) - 1);
+}
+
+static void config_write_pciex_rc(unsigned int __iomem *base, uint32_t where,
+				  uint32_t size, uint32_t val)
+{
+	uint32_t data;
+
+	data = (val & ((0x1 << (size * 8)) - 1)) << ((where & (4 - size)) * 8);
+	PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_WRITE, where, size));
+	PEX_OUT(base, PEXCWDATA, data);
+}
+
+/* Interfaces */
+/* Note: Work-around
+ *  On SCC PCIEXC, one device is seen on all 32 dev_no.
+ *  As SCC PCIEXC can have only one device on the bus, we look only one dev_no.
+ * (dev_no = 1)
+ */
+static int scc_pciex_read_config(struct pci_bus *bus, unsigned int devfn,
+				 int where, int size, unsigned int *val)
+{
+	struct pci_controller *phb = pci_bus_to_host(bus);
+
+	if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1) {
+		*val = ~0;
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	if (bus->number == 0 && PCI_SLOT(devfn) == 0)
+		*val = config_read_pciex_rc(phb->cfg_addr, where, size);
+	else
+		*val = config_read_pciex_dev(phb->cfg_addr, bus->number,
+				PCI_SLOT(devfn), PCI_FUNC(devfn), where, size);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int scc_pciex_write_config(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, unsigned int val)
+{
+	struct pci_controller *phb = pci_bus_to_host(bus);
+
+	if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (bus->number == 0 && PCI_SLOT(devfn) == 0)
+		config_write_pciex_rc(phb->cfg_addr, where, size, val);
+	else
+		config_write_pciex_dev(phb->cfg_addr, bus->number,
+			PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops scc_pciex_pci_ops = {
+	scc_pciex_read_config,
+	scc_pciex_write_config,
+};
+
+static void pciex_clear_intr_all(unsigned int __iomem *base)
+{
+	PEX_OUT(base, PEXAERRSTS, 0xffffffff);
+	PEX_OUT(base, PEXPRERRSTS, 0xffffffff);
+	PEX_OUT(base, PEXINTSTS, 0xffffffff);
+}
+
+#if 0
+static void pciex_disable_intr_all(unsigned int *base)
+{
+	PEX_OUT(base, PEXINTMASK,   0x0);
+	PEX_OUT(base, PEXAERRMASK,  0x0);
+	PEX_OUT(base, PEXPRERRMASK, 0x0);
+	PEX_OUT(base, PEXVDMASK,    0x0);
+}
+#endif
+
+static void pciex_enable_intr_all(unsigned int __iomem *base)
+{
+	PEX_OUT(base, PEXINTMASK, 0x0000e7f1);
+	PEX_OUT(base, PEXAERRMASK, 0x03ff01ff);
+	PEX_OUT(base, PEXPRERRMASK, 0x0001010f);
+	PEX_OUT(base, PEXVDMASK, 0x00000001);
+}
+
+static void pciex_check_status(unsigned int __iomem *base)
+{
+	uint32_t err = 0;
+	uint32_t intsts, aerr, prerr, rcvcp, lenerr;
+	uint32_t maea, maec;
+
+	intsts = PEX_IN(base, PEXINTSTS);
+	aerr = PEX_IN(base, PEXAERRSTS);
+	prerr = PEX_IN(base, PEXPRERRSTS);
+	rcvcp = PEX_IN(base, PEXRCVCPLIDA);
+	lenerr = PEX_IN(base, PEXLENERRIDA);
+
+	if (intsts || aerr || prerr || rcvcp || lenerr)
+		err = 1;
+
+	pr_info("PCEXC interrupt!!\n");
+	pr_info("PEXINTSTS    :0x%08x\n", intsts);
+	pr_info("PEXAERRSTS   :0x%08x\n", aerr);
+	pr_info("PEXPRERRSTS  :0x%08x\n", prerr);
+	pr_info("PEXRCVCPLIDA :0x%08x\n", rcvcp);
+	pr_info("PEXLENERRIDA :0x%08x\n", lenerr);
+
+	/* print detail of Protection Error */
+	if (intsts & 0x00004000) {
+		uint32_t i, n;
+		for (i = 0; i < 4; i++) {
+			n = 1 << i;
+			if (prerr & n) {
+				maea = PEX_IN(base, PEXMAEA(i));
+				maec = PEX_IN(base, PEXMAEC(i));
+				pr_info("PEXMAEC%d     :0x%08x\n", i, maec);
+				pr_info("PEXMAEA%d     :0x%08x\n", i, maea);
+			}
+		}
+	}
+
+	if (err)
+		pciex_clear_intr_all(base);
+}
+
+static irqreturn_t pciex_handle_internal_irq(int irq, void *dev_id)
+{
+	struct pci_controller *phb = dev_id;
+
+	pr_debug("PCIEX:pciex_handle_internal_irq(irq=%d)\n", irq);
+
+	BUG_ON(phb->cfg_addr == NULL);
+
+	pciex_check_status(phb->cfg_addr);
+
+	return IRQ_HANDLED;
+}
+
+static __init int celleb_setup_pciex(struct device_node *node,
+				     struct pci_controller *phb)
+{
+	struct resource	r;
+	int virq;
+
+	/* SMMIO registers; used inside this file */
+	if (of_address_to_resource(node, 0, &r)) {
+		pr_err("PCIEXC:Failed to get config resource.\n");
+		return 1;
+	}
+	phb->cfg_addr = ioremap(r.start, resource_size(&r));
+	if (!phb->cfg_addr) {
+		pr_err("PCIEXC:Failed to remap SMMIO region.\n");
+		return 1;
+	}
+
+	/* Not use cfg_data,  cmd and data regs are near address reg */
+	phb->cfg_data = NULL;
+
+	/* set pci_ops */
+	phb->ops = &scc_pciex_pci_ops;
+
+	/* internal interrupt handler */
+	virq = irq_of_parse_and_map(node, 1);
+	if (!virq) {
+		pr_err("PCIEXC:Failed to map irq\n");
+		goto error;
+	}
+	if (request_irq(virq, pciex_handle_internal_irq,
+			0, "pciex", (void *)phb)) {
+		pr_err("PCIEXC:Failed to request irq\n");
+		goto error;
+	}
+
+	/* enable all interrupts */
+	pciex_clear_intr_all(phb->cfg_addr);
+	pciex_enable_intr_all(phb->cfg_addr);
+	/* MSI: TBD */
+
+	return 0;
+
+error:
+	phb->cfg_data = NULL;
+	if (phb->cfg_addr)
+		iounmap(phb->cfg_addr);
+	phb->cfg_addr = NULL;
+	return 1;
+}
+
+struct celleb_phb_spec celleb_pciex_spec __initdata = {
+	.setup = celleb_setup_pciex,
+	.ops = &scc_pciex_ops,
+	.iowa_init = &scc_pciex_iowa_init,
+};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c
new file mode 100644
index 00000000000..c8eb5719382
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_sio.c
@@ -0,0 +1,99 @@
+/*
+ * setup serial port in SCC
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/tty.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/console.h>
+
+#include <asm/io.h>
+#include <asm/prom.h>
+
+/* sio irq0=0xb00010022 irq0=0xb00010023 irq2=0xb00010024
+    mmio=0xfff000-0x1000,0xff2000-0x1000 */
+static int txx9_serial_bitmap __initdata;
+
+static struct {
+	uint32_t offset;
+	uint32_t index;
+} txx9_scc_tab[3] __initdata = {
+	{ 0x300, 0 },	/* 0xFFF300 */
+	{ 0x400, 0 },	/* 0xFFF400 */
+	{ 0x800, 1 }	/* 0xFF2800 */
+};
+
+static int __init txx9_serial_init(void)
+{
+	extern int early_serial_txx9_setup(struct uart_port *port);
+	struct device_node *node;
+	int i;
+	struct uart_port req;
+	struct of_phandle_args irq;
+	struct resource res;
+
+	for_each_compatible_node(node, "serial", "toshiba,sio-scc") {
+		for (i = 0; i < ARRAY_SIZE(txx9_scc_tab); i++) {
+			if (!(txx9_serial_bitmap & (1<<i)))
+				continue;
+
+			if (of_irq_parse_one(node, i, &irq))
+				continue;
+			if (of_address_to_resource(node,
+				txx9_scc_tab[i].index, &res))
+				continue;
+
+			memset(&req, 0, sizeof(req));
+			req.line = i;
+			req.iotype = UPIO_MEM;
+			req.mapbase = res.start + txx9_scc_tab[i].offset;
+#ifdef CONFIG_SERIAL_TXX9_CONSOLE
+			req.membase = ioremap(req.mapbase, 0x24);
+#endif
+			req.irq = irq_create_of_mapping(&irq);
+			req.flags |= UPF_IOREMAP | UPF_BUGGY_UART
+				/*HAVE_CTS_LINE*/;
+			req.uartclk = 83300000;
+			early_serial_txx9_setup(&req);
+		}
+	}
+
+	return 0;
+}
+
+static int __init txx9_serial_config(char *ptr)
+{
+	int	i;
+
+	for (;;) {
+		switch (get_option(&ptr, &i)) {
+		default:
+			return 0;
+		case 2:
+			txx9_serial_bitmap |= 1 << i;
+			break;
+		case 1:
+			txx9_serial_bitmap |= 1 << i;
+			return 0;
+		}
+	}
+}
+__setup("txx9_serial=", txx9_serial_config);
+
+console_initcall(txx9_serial_init);
diff --git a/arch/powerpc/platforms/cell/celleb_scc_uhc.c b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
new file mode 100644
index 00000000000..d63b720bfe3
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
@@ -0,0 +1,95 @@
+/*
+ * SCC (Super Companion Chip) UHC setup
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+
+#include "celleb_scc.h"
+
+#define UHC_RESET_WAIT_MAX 10000
+
+static inline int uhc_clkctrl_ready(u32 val)
+{
+	const u32 mask = SCC_UHC_USBCEN | SCC_UHC_USBCEN;
+	return((val & mask) == mask);
+}
+
+/*
+ * UHC(usb host controller) enable function.
+ * affect to both of OHCI and EHCI core module.
+ */
+static void enable_scc_uhc(struct pci_dev *dev)
+{
+	void __iomem *uhc_base;
+	u32 __iomem *uhc_clkctrl;
+	u32 __iomem *uhc_ecmode;
+	u32 val = 0;
+	int i;
+
+	if (!machine_is(celleb_beat) &&
+	    !machine_is(celleb_native))
+		return;
+
+	uhc_base = ioremap(pci_resource_start(dev, 0),
+			   pci_resource_len(dev, 0));
+	if (!uhc_base) {
+		printk(KERN_ERR "failed to map UHC register base.\n");
+		return;
+	}
+	uhc_clkctrl = uhc_base + SCC_UHC_CKRCTRL;
+	uhc_ecmode  = uhc_base + SCC_UHC_ECMODE;
+
+	/* setup for normal mode */
+	val |= SCC_UHC_F48MCKLEN;
+	out_be32(uhc_clkctrl, val);
+	val |= SCC_UHC_PHY_SUSPEND_SEL;
+	out_be32(uhc_clkctrl, val);
+	udelay(10);
+	val |= SCC_UHC_PHYEN;
+	out_be32(uhc_clkctrl, val);
+	udelay(50);
+
+	/* disable reset */
+	val |= SCC_UHC_HCLKEN;
+	out_be32(uhc_clkctrl, val);
+	val |= (SCC_UHC_USBCEN | SCC_UHC_USBEN);
+	out_be32(uhc_clkctrl, val);
+	i = 0;
+	while (!uhc_clkctrl_ready(in_be32(uhc_clkctrl))) {
+		udelay(10);
+		if (i++ > UHC_RESET_WAIT_MAX) {
+			printk(KERN_ERR "Failed to disable UHC reset %x\n",
+			       in_be32(uhc_clkctrl));
+			break;
+		}
+	}
+
+	/* Endian Conversion Mode for Master ALL area */
+	out_be32(uhc_ecmode, SCC_UHC_ECMODE_BY_BYTE);
+
+	iounmap(uhc_base);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TOSHIBA_2,
+		 PCI_DEVICE_ID_TOSHIBA_SCC_USB, enable_scc_uhc);
diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c
new file mode 100644
index 00000000000..1d5a4d8ddad
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_setup.c
@@ -0,0 +1,243 @@
+/*
+ * Celleb setup code
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This code is based on arch/powerpc/platforms/cell/setup.c:
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified by PPC64 Team, IBM Corp
+ *  Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/irq.h>
+#include <asm/time.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/cell-regs.h>
+
+#include "beat_interrupt.h"
+#include "beat_wrapper.h"
+#include "beat.h"
+#include "celleb_pci.h"
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+static char celleb_machine_type[128] = "Celleb";
+
+static void celleb_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	/* using "CHRP" is to trick anaconda into installing FCx into Celleb */
+	seq_printf(m, "machine\t\t: %s %s\n", celleb_machine_type, model);
+	of_node_put(root);
+}
+
+static int __init celleb_machine_type_hack(char *ptr)
+{
+	strlcpy(celleb_machine_type, ptr, sizeof(celleb_machine_type));
+	return 0;
+}
+
+__setup("celleb_machine_type_hack=", celleb_machine_type_hack);
+
+static void celleb_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static void __init celleb_setup_arch_common(void)
+{
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+}
+
+static struct of_device_id celleb_bus_ids[] __initdata = {
+	{ .type = "scc", },
+	{ .type = "ioif", },	/* old style */
+	{},
+};
+
+static int __init celleb_publish_devices(void)
+{
+	/* Publish OF platform devices for southbridge IOs */
+	of_platform_bus_probe(NULL, celleb_bus_ids, NULL);
+
+	return 0;
+}
+machine_device_initcall(celleb_beat, celleb_publish_devices);
+machine_device_initcall(celleb_native, celleb_publish_devices);
+
+
+/*
+ * functions for Celleb-Beat
+ */
+static void __init celleb_setup_arch_beat(void)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_priv1_ops		= &spu_priv1_beat_ops;
+	spu_management_ops	= &spu_management_of_ops;
+#endif
+
+	celleb_setup_arch_common();
+}
+
+static int __init celleb_probe_beat(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "Beat"))
+		return 0;
+
+	powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS
+		| FW_FEATURE_BEAT | FW_FEATURE_LPAR;
+	hpte_init_beat_v3();
+
+	return 1;
+}
+
+
+/*
+ * functions for Celleb-native
+ */
+static void __init celleb_init_IRQ_native(void)
+{
+	iic_init_IRQ();
+	spider_init_IRQ();
+}
+
+static void __init celleb_setup_arch_native(void)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_priv1_ops		= &spu_priv1_mmio_ops;
+	spu_management_ops	= &spu_management_of_ops;
+#endif
+
+	cbe_regs_init();
+
+#ifdef CONFIG_CBE_RAS
+	cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+	smp_init_cell();
+#endif
+
+	cbe_pervasive_init();
+
+	/* XXX: nvram initialization should be added */
+
+	celleb_setup_arch_common();
+}
+
+static int __init celleb_probe_native(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "Beat") ||
+	    !of_flat_dt_is_compatible(root, "TOSHIBA,Celleb"))
+		return 0;
+
+	powerpc_firmware_features |= FW_FEATURE_CELLEB_ALWAYS;
+	hpte_init_native();
+
+	return 1;
+}
+
+
+/*
+ * machine definitions
+ */
+define_machine(celleb_beat) {
+	.name			= "Cell Reference Set (Beat)",
+	.probe			= celleb_probe_beat,
+	.setup_arch		= celleb_setup_arch_beat,
+	.show_cpuinfo		= celleb_show_cpuinfo,
+	.restart		= beat_restart,
+	.power_off		= beat_power_off,
+	.halt			= beat_halt,
+	.get_rtc_time		= beat_get_rtc_time,
+	.set_rtc_time		= beat_set_rtc_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= celleb_progress,
+	.power_save		= beat_power_save,
+	.nvram_size		= beat_nvram_get_size,
+	.nvram_read		= beat_nvram_read,
+	.nvram_write		= beat_nvram_write,
+	.set_dabr		= beat_set_xdabr,
+	.init_IRQ		= beatic_init_IRQ,
+	.get_irq		= beatic_get_irq,
+	.pci_probe_mode 	= celleb_pci_probe_mode,
+	.pci_setup_phb		= celleb_setup_phb,
+#ifdef CONFIG_KEXEC
+	.kexec_cpu_down		= beat_kexec_cpu_down,
+#endif
+};
+
+define_machine(celleb_native) {
+	.name			= "Cell Reference Set (native)",
+	.probe			= celleb_probe_native,
+	.setup_arch		= celleb_setup_arch_native,
+	.show_cpuinfo		= celleb_show_cpuinfo,
+	.restart		= rtas_restart,
+	.power_off		= rtas_power_off,
+	.halt			= rtas_halt,
+	.get_boot_time		= rtas_get_boot_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= celleb_progress,
+	.pci_probe_mode 	= celleb_pci_probe_mode,
+	.pci_setup_phb		= celleb_setup_phb,
+	.init_IRQ		= celleb_init_IRQ_native,
+};
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
new file mode 100644
index 00000000000..82607d621ac
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -0,0 +1,171 @@
+/*
+ * spu aware cpufreq governor for the cell processor
+ *
+ * © Copyright IBM Corporation 2006-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/atomic.h>
+#include <asm/machdep.h>
+#include <asm/spu.h>
+
+#define POLL_TIME	100000		/* in µs */
+#define EXP		753		/* exp(-1) in fixed-point */
+
+struct spu_gov_info_struct {
+	unsigned long busy_spus;	/* fixed-point */
+	struct cpufreq_policy *policy;
+	struct delayed_work work;
+	unsigned int poll_int;		/* µs */
+};
+static DEFINE_PER_CPU(struct spu_gov_info_struct, spu_gov_info);
+
+static int calc_freq(struct spu_gov_info_struct *info)
+{
+	int cpu;
+	int busy_spus;
+
+	cpu = info->policy->cpu;
+	busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
+
+	CALC_LOAD(info->busy_spus, EXP, busy_spus * FIXED_1);
+	pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
+			cpu, busy_spus, info->busy_spus);
+
+	return info->policy->max * info->busy_spus / FIXED_1;
+}
+
+static void spu_gov_work(struct work_struct *work)
+{
+	struct spu_gov_info_struct *info;
+	int delay;
+	unsigned long target_freq;
+
+	info = container_of(work, struct spu_gov_info_struct, work.work);
+
+	/* after cancel_delayed_work_sync we unset info->policy */
+	BUG_ON(info->policy == NULL);
+
+	target_freq = calc_freq(info);
+	__cpufreq_driver_target(info->policy, target_freq, CPUFREQ_RELATION_H);
+
+	delay = usecs_to_jiffies(info->poll_int);
+	schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_init_work(struct spu_gov_info_struct *info)
+{
+	int delay = usecs_to_jiffies(info->poll_int);
+	INIT_DEFERRABLE_WORK(&info->work, spu_gov_work);
+	schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_cancel_work(struct spu_gov_info_struct *info)
+{
+	cancel_delayed_work_sync(&info->work);
+}
+
+static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event)
+{
+	unsigned int cpu = policy->cpu;
+	struct spu_gov_info_struct *info, *affected_info;
+	int i;
+	int ret = 0;
+
+	info = &per_cpu(spu_gov_info, cpu);
+
+	switch (event) {
+	case CPUFREQ_GOV_START:
+		if (!cpu_online(cpu)) {
+			printk(KERN_ERR "cpu %d is not online\n", cpu);
+			ret = -EINVAL;
+			break;
+		}
+
+		if (!policy->cur) {
+			printk(KERN_ERR "no cpu specified in policy\n");
+			ret = -EINVAL;
+			break;
+		}
+
+		/* initialize spu_gov_info for all affected cpus */
+		for_each_cpu(i, policy->cpus) {
+			affected_info = &per_cpu(spu_gov_info, i);
+			affected_info->policy = policy;
+		}
+
+		info->poll_int = POLL_TIME;
+
+		/* setup timer */
+		spu_gov_init_work(info);
+
+		break;
+
+	case CPUFREQ_GOV_STOP:
+		/* cancel timer */
+		spu_gov_cancel_work(info);
+
+		/* clean spu_gov_info for all affected cpus */
+		for_each_cpu (i, policy->cpus) {
+			info = &per_cpu(spu_gov_info, i);
+			info->policy = NULL;
+		}
+
+		break;
+	}
+
+	return ret;
+}
+
+static struct cpufreq_governor spu_governor = {
+	.name = "spudemand",
+	.governor = spu_gov_govern,
+	.owner = THIS_MODULE,
+};
+
+/*
+ * module init and destoy
+ */
+
+static int __init spu_gov_init(void)
+{
+	int ret;
+
+	ret = cpufreq_register_governor(&spu_governor);
+	if (ret)
+		printk(KERN_ERR "registration of governor failed\n");
+	return ret;
+}
+
+static void __exit spu_gov_exit(void)
+{
+	cpufreq_unregister_governor(&spu_governor);
+}
+
+
+module_init(spu_gov_init);
+module_exit(spu_gov_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 63aa52acf44..8a106b4172e 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -1,6 +1,9 @@
 /*
  * Cell Internal Interrupt Controller
  *
+ * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *                    IBM, Corp.
+ *
  * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
  *
  * Author: Arnd Bergmann <arndb@de.ibm.com>
@@ -18,277 +21,391 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * TODO:
+ * - Fix various assumptions related to HW CPU numbers vs. linux CPU numbers
+ *   vs node numbers in the setup code
+ * - Implement proper handling of maxcpus=1/2 (that is, routing of irqs from
+ *   a non-active node to the active node)
  */
 
-#include <linux/config.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/percpu.h>
 #include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
 
 #include <asm/io.h>
 #include <asm/pgtable.h>
 #include <asm/prom.h>
 #include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/cell-regs.h>
 
 #include "interrupt.h"
 
-struct iic_pending_bits {
-	u32 data;
-	u8 flags;
-	u8 class;
-	u8 source;
-	u8 prio;
-};
-
-enum iic_pending_flags {
-	IIC_VALID = 0x80,
-	IIC_IPI   = 0x40,
-};
-
-struct iic_regs {
-	struct iic_pending_bits pending;
-	struct iic_pending_bits pending_destr;
-	u64 generate;
-	u64 prio;
-};
-
 struct iic {
-	struct iic_regs __iomem *regs;
+	struct cbe_iic_thread_regs __iomem *regs;
 	u8 target_id;
+	u8 eoi_stack[16];
+	int eoi_ptr;
+	struct device_node *node;
 };
 
-static DEFINE_PER_CPU(struct iic, iic);
+static DEFINE_PER_CPU(struct iic, cpu_iic);
+#define IIC_NODE_COUNT	2
+static struct irq_domain *iic_host;
 
-void iic_local_enable(void)
+/* Convert between "pending" bits and hw irq number */
+static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits)
 {
-	out_be64(&__get_cpu_var(iic).regs->prio, 0xff);
+	unsigned char unit = bits.source & 0xf;
+	unsigned char node = bits.source >> 4;
+	unsigned char class = bits.class & 3;
+
+	/* Decode IPIs */
+	if (bits.flags & CBE_IIC_IRQ_IPI)
+		return IIC_IRQ_TYPE_IPI | (bits.prio >> 4);
+	else
+		return (node << IIC_IRQ_NODE_SHIFT) | (class << 4) | unit;
 }
 
-void iic_local_disable(void)
+static void iic_mask(struct irq_data *d)
 {
-	out_be64(&__get_cpu_var(iic).regs->prio, 0x0);
 }
 
-static unsigned int iic_startup(unsigned int irq)
+static void iic_unmask(struct irq_data *d)
 {
-	return 0;
 }
 
-static void iic_enable(unsigned int irq)
+static void iic_eoi(struct irq_data *d)
 {
-	iic_local_enable();
+	struct iic *iic = &__get_cpu_var(cpu_iic);
+	out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
+	BUG_ON(iic->eoi_ptr < 0);
 }
 
-static void iic_disable(unsigned int irq)
-{
-}
+static struct irq_chip iic_chip = {
+	.name = "CELL-IIC",
+	.irq_mask = iic_mask,
+	.irq_unmask = iic_unmask,
+	.irq_eoi = iic_eoi,
+};
+
 
-static void iic_end(unsigned int irq)
+static void iic_ioexc_eoi(struct irq_data *d)
 {
-	iic_local_enable();
 }
 
-static struct hw_interrupt_type iic_pic = {
-	.typename = " CELL-IIC ",
-	.startup = iic_startup,
-	.enable = iic_enable,
-	.disable = iic_disable,
-	.end = iic_end,
-};
-
-static int iic_external_get_irq(struct iic_pending_bits pending)
+static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc)
 {
-	int irq;
-	unsigned char node, unit;
-
-	node = pending.source >> 4;
-	unit = pending.source & 0xf;
-	irq = -1;
-
-	/*
-	 * This mapping is specific to the Cell Broadband
-	 * Engine. We might need to get the numbers
-	 * from the device tree to support future CPUs.
-	 */
-	switch (unit) {
-	case 0x00:
-	case 0x0b:
-		/*
-		 * One of these units can be connected
-		 * to an external interrupt controller.
-		 */
-		if (pending.prio > 0x3f ||
-		    pending.class != 2)
-			break;
-		irq = IIC_EXT_OFFSET
-			+ spider_get_irq(pending.prio + node * IIC_NODE_STRIDE)
-			+ node * IIC_NODE_STRIDE;
-		break;
-	case 0x01 ... 0x04:
-	case 0x07 ... 0x0a:
-		/*
-		 * These units are connected to the SPEs
-		 */
-		if (pending.class > 2)
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct cbe_iic_regs __iomem *node_iic =
+		(void __iomem *)irq_desc_get_handler_data(desc);
+	unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC;
+	unsigned long bits, ack;
+	int cascade;
+
+	for (;;) {
+		bits = in_be64(&node_iic->iic_is);
+		if (bits == 0)
 			break;
-		irq = IIC_SPE_OFFSET
-			+ pending.class * IIC_CLASS_STRIDE
-			+ node * IIC_NODE_STRIDE
-			+ unit;
-		break;
+		/* pre-ack edge interrupts */
+		ack = bits & IIC_ISR_EDGE_MASK;
+		if (ack)
+			out_be64(&node_iic->iic_is, ack);
+		/* handle them */
+		for (cascade = 63; cascade >= 0; cascade--)
+			if (bits & (0x8000000000000000UL >> cascade)) {
+				unsigned int cirq =
+					irq_linear_revmap(iic_host,
+							  base | cascade);
+				if (cirq != NO_IRQ)
+					generic_handle_irq(cirq);
+			}
+		/* post-ack level interrupts */
+		ack = bits & ~IIC_ISR_EDGE_MASK;
+		if (ack)
+			out_be64(&node_iic->iic_is, ack);
 	}
-	if (irq == -1)
-		printk(KERN_WARNING "Unexpected interrupt class %02x, "
-			"source %02x, prio %02x, cpu %02x\n", pending.class,
-			pending.source, pending.prio, smp_processor_id());
-	return irq;
+	chip->irq_eoi(&desc->irq_data);
 }
 
+
+static struct irq_chip iic_ioexc_chip = {
+	.name = "CELL-IOEX",
+	.irq_mask = iic_mask,
+	.irq_unmask = iic_unmask,
+	.irq_eoi = iic_ioexc_eoi,
+};
+
 /* Get an IRQ number from the pending state register of the IIC */
-int iic_get_irq(struct pt_regs *regs)
+static unsigned int iic_get_irq(void)
 {
+	struct cbe_iic_pending_bits pending;
 	struct iic *iic;
-	int irq;
-	struct iic_pending_bits pending;
-
-	iic = &__get_cpu_var(iic);
-	*(unsigned long *) &pending = 
-		in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
-
-	irq = -1;
-	if (pending.flags & IIC_VALID) {
-		if (pending.flags & IIC_IPI) {
-			irq = IIC_IPI_OFFSET + (pending.prio >> 4);
-/*
-			if (irq > 0x80)
-				printk(KERN_WARNING "Unexpected IPI prio %02x"
-					"on CPU %02x\n", pending.prio,
-							smp_processor_id());
-*/
-		} else {
-			irq = iic_external_get_irq(pending);
-		}
-	}
-	return irq;
+	unsigned int virq;
+
+	iic = &__get_cpu_var(cpu_iic);
+	*(unsigned long *) &pending =
+		in_be64((u64 __iomem *) &iic->regs->pending_destr);
+	if (!(pending.flags & CBE_IIC_IRQ_VALID))
+		return NO_IRQ;
+	virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
+	if (virq == NO_IRQ)
+		return NO_IRQ;
+	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+	BUG_ON(iic->eoi_ptr > 15);
+	return virq;
 }
 
-static int setup_iic(int cpu, struct iic *iic)
+void iic_setup_cpu(void)
 {
-	struct device_node *np;
-	int nodeid = cpu / 2;
-	unsigned long regs;
-
-	for (np = of_find_node_by_type(NULL, "cpu");
-	     np;
-	     np = of_find_node_by_type(np, "cpu")) {
-		if (nodeid == *(int *)get_property(np, "node-id", NULL))
-			break;
-	}
-
-	if (!np) {
-		printk(KERN_WARNING "IIC: CPU %d not found\n", cpu);
-		iic->regs = NULL;
-		iic->target_id = 0xff;
-		return -ENODEV;
-	}
-
-	regs = *(long *)get_property(np, "iic", NULL);
-
-	/* hack until we have decided on the devtree info */
-	regs += 0x400;
-	if (cpu & 1)
-		regs += 0x20;
+	out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff);
+}
 
-	printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs);
-	iic->regs = __ioremap(regs, sizeof(struct iic_regs),
-					 _PAGE_NO_CACHE);
-	iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe);
-	return 0;
+u8 iic_get_target_id(int cpu)
+{
+	return per_cpu(cpu_iic, cpu).target_id;
 }
 
+EXPORT_SYMBOL_GPL(iic_get_target_id);
+
 #ifdef CONFIG_SMP
 
 /* Use the highest interrupt priorities for IPI */
-static inline int iic_ipi_to_irq(int ipi)
+static inline int iic_msg_to_irq(int msg)
 {
-	return IIC_IPI_OFFSET + IIC_NUM_IPIS - 1 - ipi;
+	return IIC_IRQ_TYPE_IPI + 0xf - msg;
 }
 
-static inline int iic_irq_to_ipi(int irq)
+void iic_message_pass(int cpu, int msg)
 {
-	return IIC_NUM_IPIS - 1 - (irq - IIC_IPI_OFFSET);
+	out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4);
 }
 
-void iic_setup_cpu(void)
+struct irq_domain *iic_get_irq_host(int node)
 {
-	out_be64(&__get_cpu_var(iic).regs->prio, 0xff);
+	return iic_host;
 }
+EXPORT_SYMBOL_GPL(iic_get_irq_host);
 
-void iic_cause_IPI(int cpu, int mesg)
+static void iic_request_ipi(int msg)
 {
-	out_be64(&per_cpu(iic, cpu).regs->generate, (IIC_NUM_IPIS - 1 - mesg) << 4);
+	int virq;
+
+	virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg));
+	if (virq == NO_IRQ) {
+		printk(KERN_ERR
+		       "iic: failed to map IPI %s\n", smp_ipi_name[msg]);
+		return;
+	}
+
+	/*
+	 * If smp_request_message_ipi encounters an error it will notify
+	 * the error.  If a message is not needed it will return non-zero.
+	 */
+	if (smp_request_message_ipi(virq, msg))
+		irq_dispose_mapping(virq);
 }
 
-u8 iic_get_target_id(int cpu)
+void iic_request_IPIs(void)
 {
-	return per_cpu(iic, cpu).target_id;
+	iic_request_ipi(PPC_MSG_CALL_FUNCTION);
+	iic_request_ipi(PPC_MSG_RESCHEDULE);
+	iic_request_ipi(PPC_MSG_TICK_BROADCAST);
+	iic_request_ipi(PPC_MSG_DEBUGGER_BREAK);
 }
-EXPORT_SYMBOL_GPL(iic_get_target_id);
 
-static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
+#endif /* CONFIG_SMP */
+
+
+static int iic_host_match(struct irq_domain *h, struct device_node *node)
 {
-	smp_message_recv(iic_irq_to_ipi(irq), regs);
-	return IRQ_HANDLED;
+	return of_device_is_compatible(node,
+				    "IBM,CBEA-Internal-Interrupt-Controller");
 }
 
-static void iic_request_ipi(int ipi, const char *name)
+static int iic_host_map(struct irq_domain *h, unsigned int virq,
+			irq_hw_number_t hw)
 {
-	int irq;
-
-	irq = iic_ipi_to_irq(ipi);
-	/* IPIs are marked SA_INTERRUPT as they must run with irqs
-	 * disabled */
-	get_irq_desc(irq)->handler = &iic_pic;
-	get_irq_desc(irq)->status |= IRQ_PER_CPU;
-	request_irq(irq, iic_ipi_action, SA_INTERRUPT, name, NULL);
+	switch (hw & IIC_IRQ_TYPE_MASK) {
+	case IIC_IRQ_TYPE_IPI:
+		irq_set_chip_and_handler(virq, &iic_chip, handle_percpu_irq);
+		break;
+	case IIC_IRQ_TYPE_IOEXC:
+		irq_set_chip_and_handler(virq, &iic_ioexc_chip,
+					 handle_edge_eoi_irq);
+		break;
+	default:
+		irq_set_chip_and_handler(virq, &iic_chip, handle_edge_eoi_irq);
+	}
+	return 0;
 }
 
-void iic_request_IPIs(void)
+static int iic_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
 {
-	iic_request_ipi(PPC_MSG_CALL_FUNCTION, "IPI-call");
-	iic_request_ipi(PPC_MSG_RESCHEDULE, "IPI-resched");
-#ifdef CONFIG_DEBUGGER
-	iic_request_ipi(PPC_MSG_DEBUGGER_BREAK, "IPI-debug");
-#endif /* CONFIG_DEBUGGER */
+	unsigned int node, ext, unit, class;
+	const u32 *val;
+
+	if (!of_device_is_compatible(ct,
+				     "IBM,CBEA-Internal-Interrupt-Controller"))
+		return -ENODEV;
+	if (intsize != 1)
+		return -ENODEV;
+	val = of_get_property(ct, "#interrupt-cells", NULL);
+	if (val == NULL || *val != 1)
+		return -ENODEV;
+
+	node = intspec[0] >> 24;
+	ext = (intspec[0] >> 16) & 0xff;
+	class = (intspec[0] >> 8) & 0xff;
+	unit = intspec[0] & 0xff;
+
+	/* Check if node is in supported range */
+	if (node > 1)
+		return -EINVAL;
+
+	/* Build up interrupt number, special case for IO exceptions */
+	*out_hwirq = (node << IIC_IRQ_NODE_SHIFT);
+	if (unit == IIC_UNIT_IIC && class == 1)
+		*out_hwirq |= IIC_IRQ_TYPE_IOEXC | ext;
+	else
+		*out_hwirq |= IIC_IRQ_TYPE_NORMAL |
+			(class << IIC_IRQ_CLASS_SHIFT) | unit;
+
+	/* Dummy flags, ignored by iic code */
+	*out_flags = IRQ_TYPE_EDGE_RISING;
+
+	return 0;
 }
-#endif /* CONFIG_SMP */
 
-static void iic_setup_spe_handlers(void)
+static const struct irq_domain_ops iic_host_ops = {
+	.match = iic_host_match,
+	.map = iic_host_map,
+	.xlate = iic_host_xlate,
+};
+
+static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
+				struct device_node *node)
 {
-	int be, isrc;
+	/* XXX FIXME: should locate the linux CPU number from the HW cpu
+	 * number properly. We are lucky for now
+	 */
+	struct iic *iic = &per_cpu(cpu_iic, hw_cpu);
+
+	iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
+	BUG_ON(iic->regs == NULL);
 
-	/* Assume two threads per BE are present */
-	for (be=0; be < num_present_cpus() / 2; be++) {
-		for (isrc = 0; isrc < IIC_CLASS_STRIDE * 3; isrc++) {
-			int irq = IIC_NODE_STRIDE * be + IIC_SPE_OFFSET + isrc;
-			get_irq_desc(irq)->handler = &iic_pic;
+	iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe);
+	iic->eoi_stack[0] = 0xff;
+	iic->node = of_node_get(node);
+	out_be64(&iic->regs->prio, 0);
+
+	printk(KERN_INFO "IIC for CPU %d target id 0x%x : %s\n",
+	       hw_cpu, iic->target_id, node->full_name);
+}
+
+static int __init setup_iic(void)
+{
+	struct device_node *dn;
+	struct resource r0, r1;
+	unsigned int node, cascade, found = 0;
+	struct cbe_iic_regs __iomem *node_iic;
+	const u32 *np;
+
+	for (dn = NULL;
+	     (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
+		if (!of_device_is_compatible(dn,
+				     "IBM,CBEA-Internal-Interrupt-Controller"))
+			continue;
+		np = of_get_property(dn, "ibm,interrupt-server-ranges", NULL);
+		if (np == NULL) {
+			printk(KERN_WARNING "IIC: CPU association not found\n");
+			of_node_put(dn);
+			return -ENODEV;
+		}
+		if (of_address_to_resource(dn, 0, &r0) ||
+		    of_address_to_resource(dn, 1, &r1)) {
+			printk(KERN_WARNING "IIC: Can't resolve addresses\n");
+			of_node_put(dn);
+			return -ENODEV;
 		}
+		found++;
+		init_one_iic(np[0], r0.start, dn);
+		init_one_iic(np[1], r1.start, dn);
+
+		/* Setup cascade for IO exceptions. XXX cleanup tricks to get
+		 * node vs CPU etc...
+		 * Note that we configure the IIC_IRR here with a hard coded
+		 * priority of 1. We might want to improve that later.
+		 */
+		node = np[0] >> 1;
+		node_iic = cbe_get_cpu_iic_regs(np[0]);
+		cascade = node << IIC_IRQ_NODE_SHIFT;
+		cascade |= 1 << IIC_IRQ_CLASS_SHIFT;
+		cascade |= IIC_UNIT_IIC;
+		cascade = irq_create_mapping(iic_host, cascade);
+		if (cascade == NO_IRQ)
+			continue;
+		/*
+		 * irq_data is a generic pointer that gets passed back
+		 * to us later, so the forced cast is fine.
+		 */
+		irq_set_handler_data(cascade, (void __force *)node_iic);
+		irq_set_chained_handler(cascade, iic_ioexc_cascade);
+		out_be64(&node_iic->iic_ir,
+			 (1 << 12)		/* priority */ |
+			 (node << 4)		/* dest node */ |
+			 IIC_UNIT_THREAD_0	/* route them to thread 0 */);
+		/* Flush pending (make sure it triggers if there is
+		 * anything pending
+		 */
+		out_be64(&node_iic->iic_is, 0xfffffffffffffffful);
 	}
+
+	if (found)
+		return 0;
+	else
+		return -ENODEV;
 }
 
-void iic_init_IRQ(void)
+void __init iic_init_IRQ(void)
 {
-	int cpu, irq_offset;
-	struct iic *iic;
+	/* Setup an irq host data structure */
+	iic_host = irq_domain_add_linear(NULL, IIC_SOURCE_COUNT, &iic_host_ops,
+					 NULL);
+	BUG_ON(iic_host == NULL);
+	irq_set_default_host(iic_host);
 
-	irq_offset = 0;
-	for_each_cpu(cpu) {
-		iic = &per_cpu(iic, cpu);
-		setup_iic(cpu, iic);
-		if (iic->regs)
-			out_be64(&iic->regs->prio, 0xff);
-	}
-	iic_setup_spe_handlers();
+	/* Discover and initialize iics */
+	if (setup_iic() < 0)
+		panic("IIC: Failed to initialize !\n");
+
+	/* Set master interrupt handling function */
+	ppc_md.get_irq = iic_get_irq;
+
+	/* Enable on current CPU */
+	iic_setup_cpu();
+}
+
+void iic_set_interrupt_routing(int cpu, int thread, int priority)
+{
+	struct cbe_iic_regs __iomem *iic_regs = cbe_get_cpu_iic_regs(cpu);
+	u64 iic_ir = 0;
+	int node = cpu >> 1;
+
+	/* Set which node and thread will handle the next interrupt */
+	iic_ir |= CBE_IIC_IR_PRIO(priority) |
+		  CBE_IIC_IR_DEST_NODE(node);
+	if (thread == 0)
+		iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_0);
+	else
+		iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_1);
+	out_be64(&iic_regs->iic_ir, iic_ir);
 }
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h
index a14bd38791c..4f60ae6ca35 100644
--- a/arch/powerpc/platforms/cell/interrupt.h
+++ b/arch/powerpc/platforms/cell/interrupt.h
@@ -2,62 +2,88 @@
 #define ASM_CELL_PIC_H
 #ifdef __KERNEL__
 /*
- * Mapping of IIC pending bits into per-node
- * interrupt numbers.
+ * Mapping of IIC pending bits into per-node interrupt numbers.
  *
- * IRQ     FF CC SS PP   FF CC SS PP	Description
+ * Interrupt numbers are in the range 0...0x1ff where the top bit
+ * (0x100) represent the source node. Only 2 nodes are supported with
+ * the current code though it's trivial to extend that if necessary using
+ * higher level bits
  *
- * 00-3f   80 02 +0 00 - 80 02 +0 3f	South Bridge
- * 00-3f   80 02 +b 00 - 80 02 +b 3f	South Bridge
- * 41-4a   80 00 +1 ** - 80 00 +a **	SPU Class 0
- * 51-5a   80 01 +1 ** - 80 01 +a **	SPU Class 1
- * 61-6a   80 02 +1 ** - 80 02 +a **	SPU Class 2
- * 70-7f   C0 ** ** 00 - C0 ** ** 0f	IPI
+ * The bottom 8 bits are split into 2 type bits and 6 data bits that
+ * depend on the type:
  *
- *    F flags
- *    C class
- *    S source
- *    P Priority
- *    + node number
- *    * don't care
+ * 00 (0x00 | data) : normal interrupt. data is (class << 4) | source
+ * 01 (0x40 | data) : IO exception. data is the exception number as
+ *                    defined by bit numbers in IIC_SR
+ * 10 (0x80 | data) : IPI. data is the IPI number (obtained from the priority)
+ *                    and node is always 0 (IPIs are per-cpu, their source is
+ *                    not relevant)
+ * 11 (0xc0 | data) : reserved
  *
- * A node consists of a Cell Broadband Engine and an optional
- * south bridge device providing a maximum of 64 IRQs.
- * The south bridge may be connected to either IOIF0
- * or IOIF1.
- * Each SPE is represented as three IRQ lines, one per
- * interrupt class.
- * 16 IRQ numbers are reserved for inter processor
- * interruptions, although these are only used in the
- * range of the first node.
+ * In addition, interrupt number 0x80000000 is defined as always invalid
+ * (that is the node field is expected to never extend to move than 23 bits)
  *
- * This scheme needs 128 IRQ numbers per BIF node ID,
- * which means that with the total of 512 lines
- * available, we can have a maximum of four nodes.
  */
 
 enum {
-	IIC_EXT_OFFSET   = 0x00, /* Start of south bridge IRQs */
-	IIC_NUM_EXT      = 0x40, /* Number of south bridge IRQs */
-	IIC_SPE_OFFSET   = 0x40, /* Start of SPE interrupts */
-	IIC_CLASS_STRIDE = 0x10, /* SPE IRQs per class    */
-	IIC_IPI_OFFSET   = 0x70, /* Start of IPI IRQs */
-	IIC_NUM_IPIS     = 0x10, /* IRQs reserved for IPI */
-	IIC_NODE_STRIDE  = 0x80, /* Total IRQs per node   */
+	IIC_IRQ_INVALID		= 0x80000000u,
+	IIC_IRQ_NODE_MASK	= 0x100,
+	IIC_IRQ_NODE_SHIFT	= 8,
+	IIC_IRQ_MAX		= 0x1ff,
+	IIC_IRQ_TYPE_MASK	= 0xc0,
+	IIC_IRQ_TYPE_NORMAL	= 0x00,
+	IIC_IRQ_TYPE_IOEXC	= 0x40,
+	IIC_IRQ_TYPE_IPI	= 0x80,
+	IIC_IRQ_CLASS_SHIFT	= 4,
+	IIC_IRQ_CLASS_0		= 0x00,
+	IIC_IRQ_CLASS_1		= 0x10,
+	IIC_IRQ_CLASS_2		= 0x20,
+	IIC_SOURCE_COUNT	= 0x200,
+
+	/* Here are defined the various source/dest units. Avoid using those
+	 * definitions if you can, they are mostly here for reference
+	 */
+	IIC_UNIT_SPU_0		= 0x4,
+	IIC_UNIT_SPU_1		= 0x7,
+	IIC_UNIT_SPU_2		= 0x3,
+	IIC_UNIT_SPU_3		= 0x8,
+	IIC_UNIT_SPU_4		= 0x2,
+	IIC_UNIT_SPU_5		= 0x9,
+	IIC_UNIT_SPU_6		= 0x1,
+	IIC_UNIT_SPU_7		= 0xa,
+	IIC_UNIT_IOC_0		= 0x0,
+	IIC_UNIT_IOC_1		= 0xb,
+	IIC_UNIT_THREAD_0	= 0xe, /* target only */
+	IIC_UNIT_THREAD_1	= 0xf, /* target only */
+	IIC_UNIT_IIC		= 0xe, /* source only (IO exceptions) */
+
+	/* Base numbers for the external interrupts */
+	IIC_IRQ_EXT_IOIF0	=
+		IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_0,
+	IIC_IRQ_EXT_IOIF1	=
+		IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_1,
+
+	/* Base numbers for the IIC_ISR interrupts */
+	IIC_IRQ_IOEX_TMI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 63,
+	IIC_IRQ_IOEX_PMI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 62,
+	IIC_IRQ_IOEX_ATI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 61,
+	IIC_IRQ_IOEX_MATBFI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 60,
+	IIC_IRQ_IOEX_ELDI	= IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 59,
+
+	/* Which bits in IIC_ISR are edge sensitive */
+	IIC_ISR_EDGE_MASK	= 0x4ul,
 };
 
 extern void iic_init_IRQ(void);
-extern int  iic_get_irq(struct pt_regs *regs);
-extern void iic_cause_IPI(int cpu, int mesg);
+extern void iic_message_pass(int cpu, int msg);
 extern void iic_request_IPIs(void);
 extern void iic_setup_cpu(void);
-extern void iic_local_enable(void);
-extern void iic_local_disable(void);
 
 extern u8 iic_get_target_id(int cpu);
 
 extern void spider_init_IRQ(void);
-extern int spider_get_irq(unsigned long int_pending);
+
+extern void iic_set_interrupt_routing(int cpu, int thread, int priority);
 
 #endif
 #endif /* ASM_CELL_PIC_H */
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 46e7cb9c3e6..2b90ff8a93b 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -1,508 +1,1237 @@
 /*
  * IOMMU implementation for Cell Broadband Processor Architecture
- * We just establish a linear mapping at boot by setting all the
- * IOPT cache entries in the CPU.
- * The mapping functions should be identical to pci_direct_iommu, 
- * except for the handling of the high order bit that is required
- * by the Spider bridge. These should be split into a separate
- * file at the point where we get a different bridge chip.
  *
- * Copyright (C) 2005 IBM Deutschland Entwicklung GmbH,
- *			 Arnd Bergmann <arndb@de.ibm.com>
+ * (C) Copyright IBM Corporation 2006-2008
  *
- * Based on linear mapping
- * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ * Author: Jeremy Kerr <jk@ozlabs.org>
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #undef DEBUG
 
 #include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/string.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-#include <linux/kernel.h>
-#include <linux/compiler.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
 
-#include <asm/sections.h>
-#include <asm/iommu.h>
-#include <asm/io.h>
 #include <asm/prom.h>
-#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
 #include <asm/machdep.h>
-#include <asm/pmac_feature.h>
-#include <asm/abs_addr.h>
-#include <asm/system.h>
-#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
 #include <asm/udbg.h>
+#include <asm/firmware.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages
+ * instead of leaving them mapped to some dummy page. This can be
+ * enabled once the appropriate workarounds for spider bugs have
+ * been enabled
+ */
+#define CELL_IOMMU_REAL_UNMAP
+
+/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of
+ * IO PTEs based on the transfer direction. That can be enabled
+ * once spider-net has been fixed to pass the correct direction
+ * to the DMA mapping functions
+ */
+#define CELL_IOMMU_STRICT_PROTECTION
+
+
+#define NR_IOMMUS			2
+
+/* IOC mmap registers */
+#define IOC_Reg_Size			0x2000
+
+#define IOC_IOPT_CacheInvd		0x908
+#define IOC_IOPT_CacheInvd_NE_Mask	0xffe0000000000000ul
+#define IOC_IOPT_CacheInvd_IOPTE_Mask	0x000003fffffffff8ul
+#define IOC_IOPT_CacheInvd_Busy		0x0000000000000001ul
+
+#define IOC_IOST_Origin			0x918
+#define IOC_IOST_Origin_E		0x8000000000000000ul
+#define IOC_IOST_Origin_HW		0x0000000000000800ul
+#define IOC_IOST_Origin_HL		0x0000000000000400ul
+
+#define IOC_IO_ExcpStat			0x920
+#define IOC_IO_ExcpStat_V		0x8000000000000000ul
+#define IOC_IO_ExcpStat_SPF_Mask	0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_S		0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_P		0x2000000000000000ul
+#define IOC_IO_ExcpStat_ADDR_Mask	0x00000007fffff000ul
+#define IOC_IO_ExcpStat_RW_Mask		0x0000000000000800ul
+#define IOC_IO_ExcpStat_IOID_Mask	0x00000000000007fful
+
+#define IOC_IO_ExcpMask			0x928
+#define IOC_IO_ExcpMask_SFE		0x4000000000000000ul
+#define IOC_IO_ExcpMask_PFE		0x2000000000000000ul
+
+#define IOC_IOCmd_Offset		0x1000
+
+#define IOC_IOCmd_Cfg			0xc00
+#define IOC_IOCmd_Cfg_TE		0x0000800000000000ul
+
+
+/* Segment table entries */
+#define IOSTE_V			0x8000000000000000ul /* valid */
+#define IOSTE_H			0x4000000000000000ul /* cache hint */
+#define IOSTE_PT_Base_RPN_Mask  0x3ffffffffffff000ul /* base RPN of IOPT */
+#define IOSTE_NPPT_Mask		0x0000000000000fe0ul /* no. pages in IOPT */
+#define IOSTE_PS_Mask		0x0000000000000007ul /* page size */
+#define IOSTE_PS_4K		0x0000000000000001ul /*   - 4kB  */
+#define IOSTE_PS_64K		0x0000000000000003ul /*   - 64kB */
+#define IOSTE_PS_1M		0x0000000000000005ul /*   - 1MB  */
+#define IOSTE_PS_16M		0x0000000000000007ul /*   - 16MB */
+
+
+/* IOMMU sizing */
+#define IO_SEGMENT_SHIFT	28
+#define IO_PAGENO_BITS(shift)	(IO_SEGMENT_SHIFT - (shift))
 
-#include "iommu.h"
+/* The high bit needs to be set on every DMA address */
+#define SPIDER_DMA_OFFSET	0x80000000ul
 
-static inline unsigned long 
-get_iopt_entry(unsigned long real_address, unsigned long ioid,
-			 unsigned long prot)
+struct iommu_window {
+	struct list_head list;
+	struct cbe_iommu *iommu;
+	unsigned long offset;
+	unsigned long size;
+	unsigned int ioid;
+	struct iommu_table table;
+};
+
+#define NAMESIZE 8
+struct cbe_iommu {
+	int nid;
+	char name[NAMESIZE];
+	void __iomem *xlate_regs;
+	void __iomem *cmd_regs;
+	unsigned long *stab;
+	unsigned long *ptab;
+	void *pad_page;
+	struct list_head windows;
+};
+
+/* Static array of iommus, one per node
+ *   each contains a list of windows, keyed from dma_window property
+ *   - on bus setup, look for a matching window, or create one
+ *   - on dev setup, assign iommu_table ptr
+ */
+static struct cbe_iommu iommus[NR_IOMMUS];
+static int cbe_nr_iommus;
+
+static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
+		long n_ptes)
 {
-	return (prot & IOPT_PROT_MASK)
-	     | (IOPT_COHERENT)
-	     | (IOPT_ORDER_VC)
-	     | (real_address & IOPT_RPN_MASK)
-	     | (ioid & IOPT_IOID_MASK);
-}
+	u64 __iomem *reg;
+	u64 val;
+	long n;
+
+	reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
+
+	while (n_ptes > 0) {
+		/* we can invalidate up to 1 << 11 PTEs at once */
+		n = min(n_ptes, 1l << 11);
+		val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask)
+			| (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask)
+		        | IOC_IOPT_CacheInvd_Busy;
 
-typedef struct {
-	unsigned long val;
-} ioste;
+		out_be64(reg, val);
+		while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy)
+			;
+
+		n_ptes -= n;
+		pte += n;
+	}
+}
 
-static inline ioste
-mk_ioste(unsigned long val)
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
+		unsigned long uaddr, enum dma_data_direction direction,
+		struct dma_attrs *attrs)
 {
-	ioste ioste = { .val = val, };
-	return ioste;
+	int i;
+	unsigned long *io_pte, base_pte;
+	struct iommu_window *window =
+		container_of(tbl, struct iommu_window, table);
+
+	/* implementing proper protection causes problems with the spidernet
+	 * driver - check mapping directions later, but allow read & write by
+	 * default for now.*/
+#ifdef CELL_IOMMU_STRICT_PROTECTION
+	/* to avoid referencing a global, we use a trick here to setup the
+	 * protection bit. "prot" is setup to be 3 fields of 4 bits apprended
+	 * together for each of the 3 supported direction values. It is then
+	 * shifted left so that the fields matching the desired direction
+	 * lands on the appropriate bits, and other bits are masked out.
+	 */
+	const unsigned long prot = 0xc48;
+	base_pte =
+		((prot << (52 + 4 * direction)) &
+		 (CBE_IOPTE_PP_W | CBE_IOPTE_PP_R)) |
+		CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+		(window->ioid & CBE_IOPTE_IOID_Mask);
+#else
+	base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+		CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+	if (unlikely(dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs)))
+		base_pte &= ~CBE_IOPTE_SO_RW;
+
+	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+	for (i = 0; i < npages; i++, uaddr += tbl->it_page_shift)
+		io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
+
+	mb();
+
+	invalidate_tce_cache(window->iommu, io_pte, npages);
+
+	pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
+		 index, npages, direction, base_pte);
+	return 0;
 }
 
-static inline ioste
-get_iost_entry(unsigned long iopt_base, unsigned long io_address, unsigned page_size)
+static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
 {
-	unsigned long ps;
-	unsigned long iostep;
-	unsigned long nnpt;
-	unsigned long shift;
 
-	switch (page_size) {
-	case 0x1000000:
-		ps = IOST_PS_16M;
-		nnpt = 0;  /* one page per segment */
-		shift = 5; /* segment has 16 iopt entries */
-		break;
+	int i;
+	unsigned long *io_pte, pte;
+	struct iommu_window *window =
+		container_of(tbl, struct iommu_window, table);
+
+	pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages);
+
+#ifdef CELL_IOMMU_REAL_UNMAP
+	pte = 0;
+#else
+	/* spider bridge does PCI reads after freeing - insert a mapping
+	 * to a scratch page instead of an invalid entry */
+	pte = CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+		__pa(window->iommu->pad_page) |
+		(window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+
+	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
 
-	case 0x100000:
-		ps = IOST_PS_1M;
-		nnpt = 0;  /* one page per segment */
-		shift = 1; /* segment has 256 iopt entries */
-		break;
+	for (i = 0; i < npages; i++)
+		io_pte[i] = pte;
 
-	case 0x10000:
-		ps = IOST_PS_64K;
-		nnpt = 0x07; /* 8 pages per io page table */
-		shift = 0;   /* all entries are used */
-		break;
+	mb();
 
-	case 0x1000:
-		ps = IOST_PS_4K;
-		nnpt = 0x7f; /* 128 pages per io page table */
-		shift = 0;   /* all entries are used */
-		break;
+	invalidate_tce_cache(window->iommu, io_pte, npages);
+}
+
+static irqreturn_t ioc_interrupt(int irq, void *data)
+{
+	unsigned long stat, spf;
+	struct cbe_iommu *iommu = data;
+
+	stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+	spf = stat & IOC_IO_ExcpStat_SPF_Mask;
+
+	/* Might want to rate limit it */
+	printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat);
+	printk(KERN_ERR "  V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n",
+	       !!(stat & IOC_IO_ExcpStat_V),
+	       (spf == IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ',
+	       (spf == IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ',
+	       (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write",
+	       (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask));
+	printk(KERN_ERR "  page=0x%016lx\n",
+	       stat & IOC_IO_ExcpStat_ADDR_Mask);
+
+	/* clear interrupt */
+	stat &= ~IOC_IO_ExcpStat_V;
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat);
+
+	return IRQ_HANDLED;
+}
 
-	default: /* not a known compile time constant */
-		{
-			/* BUILD_BUG_ON() is not usable here */
-			extern void __get_iost_entry_bad_page_size(void);
-			__get_iost_entry_bad_page_size();
+static int cell_iommu_find_ioc(int nid, unsigned long *base)
+{
+	struct device_node *np;
+	struct resource r;
+
+	*base = 0;
+
+	/* First look for new style /be nodes */
+	for_each_node_by_name(np, "ioc") {
+		if (of_node_to_nid(np) != nid)
+			continue;
+		if (of_address_to_resource(np, 0, &r)) {
+			printk(KERN_ERR "iommu: can't get address for %s\n",
+			       np->full_name);
+			continue;
 		}
-		break;
+		*base = r.start;
+		of_node_put(np);
+		return 0;
 	}
 
-	iostep = iopt_base +
-			 /* need 8 bytes per iopte */
-			(((io_address / page_size * 8)
-			 /* align io page tables on 4k page boundaries */
-				 << shift) 
-			 /* nnpt+1 pages go into each iopt */
-				 & ~(nnpt << 12));
+	/* Ok, let's try the old way */
+	for_each_node_by_type(np, "cpu") {
+		const unsigned int *nidp;
+		const unsigned long *tmp;
+
+		nidp = of_get_property(np, "node-id", NULL);
+		if (nidp && *nidp == nid) {
+			tmp = of_get_property(np, "ioc-translation", NULL);
+			if (tmp) {
+				*base = *tmp;
+				of_node_put(np);
+				return 0;
+			}
+		}
+	}
 
-	nnpt++; /* this seems to work, but the documentation is not clear
-		   about wether we put nnpt or nnpt-1 into the ioste bits.
-		   In theory, this can't work for 4k pages. */
-	return mk_ioste(IOST_VALID_MASK
-			| (iostep & IOST_PT_BASE_MASK)
-			| ((nnpt << 5) & IOST_NNPT_MASK)
-			| (ps & IOST_PS_MASK));
+	return -ENODEV;
 }
 
-/* compute the address of an io pte */
-static inline unsigned long
-get_ioptep(ioste iost_entry, unsigned long io_address)
+static void cell_iommu_setup_stab(struct cbe_iommu *iommu,
+				unsigned long dbase, unsigned long dsize,
+				unsigned long fbase, unsigned long fsize)
 {
-	unsigned long iopt_base;
-	unsigned long page_size;
-	unsigned long page_number;
-	unsigned long iopt_offset;
+	struct page *page;
+	unsigned long segments, stab_size;
+
+	segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT;
 
-	iopt_base = iost_entry.val & IOST_PT_BASE_MASK;
-	page_size = iost_entry.val & IOST_PS_MASK;
+	pr_debug("%s: iommu[%d]: segments: %lu\n",
+			__func__, iommu->nid, segments);
 
-	/* decode page size to compute page number */
-	page_number = (io_address & 0x0fffffff) >> (10 + 2 * page_size);
-	/* page number is an offset into the io page table */
-	iopt_offset = (page_number << 3) & 0x7fff8ul;
-	return iopt_base + iopt_offset;
+	/* set up the segment table */
+	stab_size = segments * sizeof(unsigned long);
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(stab_size));
+	BUG_ON(!page);
+	iommu->stab = page_address(page);
+	memset(iommu->stab, 0, stab_size);
 }
 
-/* compute the tag field of the iopt cache entry */
-static inline unsigned long
-get_ioc_tag(ioste iost_entry, unsigned long io_address)
+static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
+		unsigned long base, unsigned long size, unsigned long gap_base,
+		unsigned long gap_size, unsigned long page_shift)
 {
-	unsigned long iopte = get_ioptep(iost_entry, io_address);
+	struct page *page;
+	int i;
+	unsigned long reg, segments, pages_per_segment, ptab_size,
+		      n_pte_pages, start_seg, *ptab;
+
+	start_seg = base >> IO_SEGMENT_SHIFT;
+	segments  = size >> IO_SEGMENT_SHIFT;
+	pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift);
+	/* PTEs for each segment must start on a 4K bounday */
+	pages_per_segment = max(pages_per_segment,
+				(1 << 12) / sizeof(unsigned long));
+
+	ptab_size = segments * pages_per_segment * sizeof(unsigned long);
+	pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __func__,
+			iommu->nid, ptab_size, get_order(ptab_size));
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size));
+	BUG_ON(!page);
+
+	ptab = page_address(page);
+	memset(ptab, 0, ptab_size);
+
+	/* number of 4K pages needed for a page table */
+	n_pte_pages = (pages_per_segment * sizeof(unsigned long)) >> 12;
+
+	pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n",
+			__func__, iommu->nid, iommu->stab, ptab,
+			n_pte_pages);
+
+	/* initialise the STEs */
+	reg = IOSTE_V | ((n_pte_pages - 1) << 5);
+
+	switch (page_shift) {
+	case 12: reg |= IOSTE_PS_4K;  break;
+	case 16: reg |= IOSTE_PS_64K; break;
+	case 20: reg |= IOSTE_PS_1M;  break;
+	case 24: reg |= IOSTE_PS_16M; break;
+	default: BUG();
+	}
+
+	gap_base = gap_base >> IO_SEGMENT_SHIFT;
+	gap_size = gap_size >> IO_SEGMENT_SHIFT;
 
-	return IOPT_VALID_MASK
-	     | ((iopte & 0x00000000000000ff8ul) >> 3)
-	     | ((iopte & 0x0000003fffffc0000ul) >> 9);
+	pr_debug("Setting up IOMMU stab:\n");
+	for (i = start_seg; i < (start_seg + segments); i++) {
+		if (i >= gap_base && i < (gap_base + gap_size)) {
+			pr_debug("\toverlap at %d, skipping\n", i);
+			continue;
+		}
+		iommu->stab[i] = reg | (__pa(ptab) + (n_pte_pages << 12) *
+					(i - start_seg));
+		pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]);
+	}
+
+	return ptab;
 }
 
-/* compute the hashed 6 bit index for the 4-way associative pte cache */
-static inline unsigned long
-get_ioc_hash(ioste iost_entry, unsigned long io_address)
+static void cell_iommu_enable_hardware(struct cbe_iommu *iommu)
 {
-	unsigned long iopte = get_ioptep(iost_entry, io_address);
+	int ret;
+	unsigned long reg, xlate_base;
+	unsigned int virq;
+
+	if (cell_iommu_find_ioc(iommu->nid, &xlate_base))
+		panic("%s: missing IOC register mappings for node %d\n",
+		      __func__, iommu->nid);
+
+	iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size);
+	iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset;
+
+	/* ensure that the STEs have updated */
+	mb();
+
+	/* setup interrupts for the iommu. */
+	reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpStat,
+			reg & ~IOC_IO_ExcpStat_V);
+	out_be64(iommu->xlate_regs + IOC_IO_ExcpMask,
+			IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE);
+
+	virq = irq_create_mapping(NULL,
+			IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT));
+	BUG_ON(virq == NO_IRQ);
+
+	ret = request_irq(virq, ioc_interrupt, 0, iommu->name, iommu);
+	BUG_ON(ret);
+
+	/* set the IOC segment table origin register (and turn on the iommu) */
+	reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW;
+	out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg);
+	in_be64(iommu->xlate_regs + IOC_IOST_Origin);
+
+	/* turn on IO translation */
+	reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE;
+	out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg);
+}
 
-	return ((iopte & 0x000000000000001f8ul) >> 3)
-	     ^ ((iopte & 0x00000000000020000ul) >> 17)
-	     ^ ((iopte & 0x00000000000010000ul) >> 15)
-	     ^ ((iopte & 0x00000000000008000ul) >> 13)
-	     ^ ((iopte & 0x00000000000004000ul) >> 11)
-	     ^ ((iopte & 0x00000000000002000ul) >> 9)
-	     ^ ((iopte & 0x00000000000001000ul) >> 7);
+static void cell_iommu_setup_hardware(struct cbe_iommu *iommu,
+	unsigned long base, unsigned long size)
+{
+	cell_iommu_setup_stab(iommu, base, size, 0, 0);
+	iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0,
+					    IOMMU_PAGE_SHIFT_4K);
+	cell_iommu_enable_hardware(iommu);
 }
 
-/* same as above, but pretend that we have a simpler 1-way associative
-   pte cache with an 8 bit index */
-static inline unsigned long
-get_ioc_hash_1way(ioste iost_entry, unsigned long io_address)
+#if 0/* Unused for now */
+static struct iommu_window *find_window(struct cbe_iommu *iommu,
+		unsigned long offset, unsigned long size)
 {
-	unsigned long iopte = get_ioptep(iost_entry, io_address);
+	struct iommu_window *window;
+
+	/* todo: check for overlapping (but not equal) windows) */
 
-	return ((iopte & 0x000000000000001f8ul) >> 3)
-	     ^ ((iopte & 0x00000000000020000ul) >> 17)
-	     ^ ((iopte & 0x00000000000010000ul) >> 15)
-	     ^ ((iopte & 0x00000000000008000ul) >> 13)
-	     ^ ((iopte & 0x00000000000004000ul) >> 11)
-	     ^ ((iopte & 0x00000000000002000ul) >> 9)
-	     ^ ((iopte & 0x00000000000001000ul) >> 7)
-	     ^ ((iopte & 0x0000000000000c000ul) >> 8);
+	list_for_each_entry(window, &(iommu->windows), list) {
+		if (window->offset == offset && window->size == size)
+			return window;
+	}
+
+	return NULL;
 }
+#endif
 
-static inline ioste
-get_iost_cache(void __iomem *base, unsigned long index)
+static inline u32 cell_iommu_get_ioid(struct device_node *np)
 {
-	unsigned long __iomem *p = (base + IOC_ST_CACHE_DIR);
-	return mk_ioste(in_be64(&p[index]));
+	const u32 *ioid;
+
+	ioid = of_get_property(np, "ioid", NULL);
+	if (ioid == NULL) {
+		printk(KERN_WARNING "iommu: missing ioid for %s using 0\n",
+		       np->full_name);
+		return 0;
+	}
+
+	return *ioid;
 }
 
-static inline void
-set_iost_cache(void __iomem *base, unsigned long index, ioste ste)
+static struct iommu_window * __init
+cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
+			unsigned long offset, unsigned long size,
+			unsigned long pte_offset)
 {
-	unsigned long __iomem *p = (base + IOC_ST_CACHE_DIR);
-	pr_debug("ioste %02lx was %016lx, store %016lx", index,
-			get_iost_cache(base, index).val, ste.val);
-	out_be64(&p[index], ste.val);
-	pr_debug(" now %016lx\n", get_iost_cache(base, index).val);
+	struct iommu_window *window;
+	struct page *page;
+	u32 ioid;
+
+	ioid = cell_iommu_get_ioid(np);
+
+	window = kzalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid);
+	BUG_ON(window == NULL);
+
+	window->offset = offset;
+	window->size = size;
+	window->ioid = ioid;
+	window->iommu = iommu;
+
+	window->table.it_blocksize = 16;
+	window->table.it_base = (unsigned long)iommu->ptab;
+	window->table.it_index = iommu->nid;
+	window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K;
+	window->table.it_offset =
+		(offset >> window->table.it_page_shift) + pte_offset;
+	window->table.it_size = size >> window->table.it_page_shift;
+
+	iommu_init_table(&window->table, iommu->nid);
+
+	pr_debug("\tioid      %d\n", window->ioid);
+	pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
+	pr_debug("\tbase      0x%016lx\n", window->table.it_base);
+	pr_debug("\toffset    0x%lx\n", window->table.it_offset);
+	pr_debug("\tsize      %ld\n", window->table.it_size);
+
+	list_add(&window->list, &iommu->windows);
+
+	if (offset != 0)
+		return window;
+
+	/* We need to map and reserve the first IOMMU page since it's used
+	 * by the spider workaround. In theory, we only need to do that when
+	 * running on spider but it doesn't really matter.
+	 *
+	 * This code also assumes that we have a window that starts at 0,
+	 * which is the case on all spider based blades.
+	 */
+	page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0);
+	BUG_ON(!page);
+	iommu->pad_page = page_address(page);
+	clear_page(iommu->pad_page);
+
+	__set_bit(0, window->table.it_map);
+	tce_build_cell(&window->table, window->table.it_offset, 1,
+		       (unsigned long)iommu->pad_page, DMA_TO_DEVICE, NULL);
+
+	return window;
 }
 
-static inline unsigned long
-get_iopt_cache(void __iomem *base, unsigned long index, unsigned long *tag)
+static struct cbe_iommu *cell_iommu_for_node(int nid)
 {
-	unsigned long __iomem *tags = (void *)(base + IOC_PT_CACHE_DIR);
-	unsigned long __iomem *p = (void *)(base + IOC_PT_CACHE_REG);	
+	int i;
 
-	*tag = tags[index];
-	rmb();
-	return *p;
+	for (i = 0; i < cbe_nr_iommus; i++)
+		if (iommus[i].nid == nid)
+			return &iommus[i];
+	return NULL;
 }
 
-static inline void
-set_iopt_cache(void __iomem *base, unsigned long index,
-		 unsigned long tag, unsigned long val)
+static unsigned long cell_dma_direct_offset;
+
+static unsigned long dma_iommu_fixed_base;
+
+/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */
+static int iommu_fixed_is_weak;
+
+static struct iommu_table *cell_get_iommu_table(struct device *dev)
 {
-	unsigned long __iomem *tags = base + IOC_PT_CACHE_DIR;
-	unsigned long __iomem *p = base + IOC_PT_CACHE_REG;
+	struct iommu_window *window;
+	struct cbe_iommu *iommu;
+
+	/* Current implementation uses the first window available in that
+	 * node's iommu. We -might- do something smarter later though it may
+	 * never be necessary
+	 */
+	iommu = cell_iommu_for_node(dev_to_node(dev));
+	if (iommu == NULL || list_empty(&iommu->windows)) {
+		dev_err(dev, "iommu: missing iommu for %s (node %d)\n",
+		       of_node_full_name(dev->of_node), dev_to_node(dev));
+		return NULL;
+	}
+	window = list_entry(iommu->windows.next, struct iommu_window, list);
+
+	return &window->table;
+}
+
+/* A coherent allocation implies strong ordering */
 
-	out_be64(p, val);
-	out_be64(&tags[index], tag);
+static void *dma_fixed_alloc_coherent(struct device *dev, size_t size,
+				      dma_addr_t *dma_handle, gfp_t flag,
+				      struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak)
+		return iommu_alloc_coherent(dev, cell_get_iommu_table(dev),
+					    size, dma_handle,
+					    device_to_mask(dev), flag,
+					    dev_to_node(dev));
+	else
+		return dma_direct_ops.alloc(dev, size, dma_handle, flag,
+					    attrs);
 }
 
-static inline void
-set_iost_origin(void __iomem *base)
+static void dma_fixed_free_coherent(struct device *dev, size_t size,
+				    void *vaddr, dma_addr_t dma_handle,
+				    struct dma_attrs *attrs)
 {
-	unsigned long __iomem *p = base + IOC_ST_ORIGIN;
-	unsigned long origin = IOSTO_ENABLE | IOSTO_SW;
+	if (iommu_fixed_is_weak)
+		iommu_free_coherent(cell_get_iommu_table(dev), size, vaddr,
+				    dma_handle);
+	else
+		dma_direct_ops.free(dev, size, vaddr, dma_handle, attrs);
+}
 
-	pr_debug("iost_origin %016lx, now %016lx\n", in_be64(p), origin);
-	out_be64(p, origin);
+static dma_addr_t dma_fixed_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction direction,
+				     struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		return dma_direct_ops.map_page(dev, page, offset, size,
+					       direction, attrs);
+	else
+		return iommu_map_page(dev, cell_get_iommu_table(dev), page,
+				      offset, size, device_to_mask(dev),
+				      direction, attrs);
 }
 
-static inline void
-set_iocmd_config(void __iomem *base)
+static void dma_fixed_unmap_page(struct device *dev, dma_addr_t dma_addr,
+				 size_t size, enum dma_data_direction direction,
+				 struct dma_attrs *attrs)
 {
-	unsigned long __iomem *p = base + 0xc00;
-	unsigned long conf;
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		dma_direct_ops.unmap_page(dev, dma_addr, size, direction,
+					  attrs);
+	else
+		iommu_unmap_page(cell_get_iommu_table(dev), dma_addr, size,
+				 direction, attrs);
+}
 
-	conf = in_be64(p);
-	pr_debug("iost_conf %016lx, now %016lx\n", conf, conf | IOCMD_CONF_TE);
-	out_be64(p, conf | IOCMD_CONF_TE);
+static int dma_fixed_map_sg(struct device *dev, struct scatterlist *sg,
+			   int nents, enum dma_data_direction direction,
+			   struct dma_attrs *attrs)
+{
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		return dma_direct_ops.map_sg(dev, sg, nents, direction, attrs);
+	else
+		return iommu_map_sg(dev, cell_get_iommu_table(dev), sg, nents,
+				    device_to_mask(dev), direction, attrs);
 }
 
-static void enable_mapping(void __iomem *base, void __iomem *mmio_base)
+static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
+			       int nents, enum dma_data_direction direction,
+			       struct dma_attrs *attrs)
 {
-	set_iocmd_config(base);
-	set_iost_origin(mmio_base);
+	if (iommu_fixed_is_weak == dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))
+		dma_direct_ops.unmap_sg(dev, sg, nents, direction, attrs);
+	else
+		iommu_unmap_sg(cell_get_iommu_table(dev), sg, nents, direction,
+			       attrs);
 }
 
-static void iommu_dev_setup_null(struct pci_dev *d) { }
-static void iommu_bus_setup_null(struct pci_bus *b) { }
+static int dma_fixed_dma_supported(struct device *dev, u64 mask)
+{
+	return mask == DMA_BIT_MASK(64);
+}
 
-struct cell_iommu {
-	unsigned long base;
-	unsigned long mmio_base;
-	void __iomem *mapped_base;
-	void __iomem *mapped_mmio_base;
+static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask);
+
+struct dma_map_ops dma_iommu_fixed_ops = {
+	.alloc          = dma_fixed_alloc_coherent,
+	.free           = dma_fixed_free_coherent,
+	.map_sg         = dma_fixed_map_sg,
+	.unmap_sg       = dma_fixed_unmap_sg,
+	.dma_supported  = dma_fixed_dma_supported,
+	.set_dma_mask   = dma_set_mask_and_switch,
+	.map_page       = dma_fixed_map_page,
+	.unmap_page     = dma_fixed_unmap_page,
 };
 
-static struct cell_iommu cell_iommus[NR_CPUS];
+static void cell_dma_dev_setup_fixed(struct device *dev);
 
-/* initialize the iommu to support a simple linear mapping
- * for each DMA window used by any device. For now, we
- * happen to know that there is only one DMA window in use,
- * starting at iopt_phys_offset. */
-static void cell_do_map_iommu(struct cell_iommu *iommu,
-			      unsigned int ioid,
-			      unsigned long map_start,
-			      unsigned long map_size)
+static void cell_dma_dev_setup(struct device *dev)
 {
-	unsigned long io_address, real_address;
-	void __iomem *ioc_base, *ioc_mmio_base;
-	ioste ioste;
-	unsigned long index;
-
-	/* we pretend the io page table was at a very high address */
-	const unsigned long fake_iopt = 0x10000000000ul;
-	const unsigned long io_page_size = 0x1000000; /* use 16M pages */
-	const unsigned long io_segment_size = 0x10000000; /* 256M */
-
-	ioc_base = iommu->mapped_base;
-	ioc_mmio_base = iommu->mapped_mmio_base;
-
-	for (real_address = 0, io_address = 0;
-	     io_address <= map_start + map_size;
-	     real_address += io_page_size, io_address += io_page_size) {
-		ioste = get_iost_entry(fake_iopt, io_address, io_page_size);
-		if ((real_address % io_segment_size) == 0) /* segment start */
-			set_iost_cache(ioc_mmio_base,
-				       io_address >> 28, ioste);
-		index = get_ioc_hash_1way(ioste, io_address);
-		pr_debug("addr %08lx, index %02lx, ioste %016lx\n",
-					 io_address, index, ioste.val);
-		set_iopt_cache(ioc_mmio_base,
-			get_ioc_hash_1way(ioste, io_address),
-			get_ioc_tag(ioste, io_address),
-			get_iopt_entry(real_address-map_start, ioid, IOPT_PROT_RW));
-	}
+	/* Order is important here, these are not mutually exclusive */
+	if (get_dma_ops(dev) == &dma_iommu_fixed_ops)
+		cell_dma_dev_setup_fixed(dev);
+	else if (get_pci_dma_ops() == &dma_iommu_ops)
+		set_iommu_table_base(dev, cell_get_iommu_table(dev));
+	else if (get_pci_dma_ops() == &dma_direct_ops)
+		set_dma_offset(dev, cell_dma_direct_offset);
+	else
+		BUG();
 }
 
-static void iommu_devnode_setup(struct device_node *d)
+static void cell_pci_dma_dev_setup(struct pci_dev *dev)
 {
-	unsigned int *ioid;
-	unsigned long *dma_window, map_start, map_size, token;
-	struct cell_iommu *iommu;
+	cell_dma_dev_setup(&dev->dev);
+}
 
-	ioid = (unsigned int *)get_property(d, "ioid", NULL);
-	if (!ioid)
-		pr_debug("No ioid entry found !\n");
+static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
+			      void *data)
+{
+	struct device *dev = data;
 
-	dma_window = (unsigned long *)get_property(d, "ibm,dma-window", NULL);
-	if (!dma_window)
-		pr_debug("No ibm,dma-window entry found !\n");
+	/* We are only intereted in device addition */
+	if (action != BUS_NOTIFY_ADD_DEVICE)
+		return 0;
 
-	map_start = dma_window[1];
-	map_size = dma_window[2];
-	token = dma_window[0] >> 32;
+	/* We use the PCI DMA ops */
+	dev->archdata.dma_ops = get_pci_dma_ops();
 
-	iommu = &cell_iommus[token];
+	cell_dma_dev_setup(dev);
 
-	cell_do_map_iommu(iommu, *ioid, map_start, map_size);
+	return 0;
 }
 
-static void iommu_bus_setup(struct pci_bus *b)
+static struct notifier_block cell_of_bus_notifier = {
+	.notifier_call = cell_of_bus_notify
+};
+
+static int __init cell_iommu_get_window(struct device_node *np,
+					 unsigned long *base,
+					 unsigned long *size)
 {
-	struct device_node *d = (struct device_node *)b->sysdata;
-	iommu_devnode_setup(d);
-}
+	const __be32 *dma_window;
+	unsigned long index;
 
+	/* Use ibm,dma-window if available, else, hard code ! */
+	dma_window = of_get_property(np, "ibm,dma-window", NULL);
+	if (dma_window == NULL) {
+		*base = 0;
+		*size = 0x80000000u;
+		return -ENODEV;
+	}
 
-static int cell_map_iommu_hardcoded(int num_nodes)
-{
-	struct cell_iommu *iommu = NULL;
+	of_parse_dma_window(np, dma_window, &index, base, size);
+	return 0;
+}
 
-	pr_debug("%s(%d): Using hardcoded defaults\n", __FUNCTION__, __LINE__);
+static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
+{
+	struct cbe_iommu *iommu;
+	int nid, i;
+
+	/* Get node ID */
+	nid = of_node_to_nid(np);
+	if (nid < 0) {
+		printk(KERN_ERR "iommu: failed to get node for %s\n",
+		       np->full_name);
+		return NULL;
+	}
+	pr_debug("iommu: setting up iommu for node %d (%s)\n",
+		 nid, np->full_name);
+
+	/* XXX todo: If we can have multiple windows on the same IOMMU, which
+	 * isn't the case today, we probably want here to check whether the
+	 * iommu for that node is already setup.
+	 * However, there might be issue with getting the size right so let's
+	 * ignore that for now. We might want to completely get rid of the
+	 * multiple window support since the cell iommu supports per-page ioids
+	 */
+
+	if (cbe_nr_iommus >= NR_IOMMUS) {
+		printk(KERN_ERR "iommu: too many IOMMUs detected ! (%s)\n",
+		       np->full_name);
+		return NULL;
+	}
 
-	/* node 0 */
-	iommu = &cell_iommus[0];
-	iommu->mapped_base = __ioremap(0x20000511000, 0x1000, _PAGE_NO_CACHE);
-	iommu->mapped_mmio_base = __ioremap(0x20000510000, 0x1000, _PAGE_NO_CACHE);
+	/* Init base fields */
+	i = cbe_nr_iommus++;
+	iommu = &iommus[i];
+	iommu->stab = NULL;
+	iommu->nid = nid;
+	snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i);
+	INIT_LIST_HEAD(&iommu->windows);
 
-	enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base);
+	return iommu;
+}
 
-	cell_do_map_iommu(iommu, 0x048a,
-			  0x20000000ul,0x20000000ul);
+static void __init cell_iommu_init_one(struct device_node *np,
+				       unsigned long offset)
+{
+	struct cbe_iommu *iommu;
+	unsigned long base, size;
 
-	if (num_nodes < 2)
-		return 0;
+	iommu = cell_iommu_alloc(np);
+	if (!iommu)
+		return;
 
-	/* node 1 */
-	iommu = &cell_iommus[1];
-	iommu->mapped_base = __ioremap(0x30000511000, 0x1000, _PAGE_NO_CACHE);
-	iommu->mapped_mmio_base = __ioremap(0x30000510000, 0x1000, _PAGE_NO_CACHE);
+	/* Obtain a window for it */
+	cell_iommu_get_window(np, &base, &size);
 
-	enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base);
+	pr_debug("\ttranslating window 0x%lx...0x%lx\n",
+		 base, base + size - 1);
 
-	cell_do_map_iommu(iommu, 0x048a,
-			  0x20000000,0x20000000ul);
+	/* Initialize the hardware */
+	cell_iommu_setup_hardware(iommu, base, size);
 
-	return 0;
+	/* Setup the iommu_table */
+	cell_iommu_setup_window(iommu, np, base, size,
+				offset >> IOMMU_PAGE_SHIFT_4K);
 }
 
+static void __init cell_disable_iommus(void)
+{
+	int node;
+	unsigned long base, val;
+	void __iomem *xregs, *cregs;
+
+	/* Make sure IOC translation is disabled on all nodes */
+	for_each_online_node(node) {
+		if (cell_iommu_find_ioc(node, &base))
+			continue;
+		xregs = ioremap(base, IOC_Reg_Size);
+		if (xregs == NULL)
+			continue;
+		cregs = xregs + IOC_IOCmd_Offset;
+
+		pr_debug("iommu: cleaning up iommu on node %d\n", node);
+
+		out_be64(xregs + IOC_IOST_Origin, 0);
+		(void)in_be64(xregs + IOC_IOST_Origin);
+		val = in_be64(cregs + IOC_IOCmd_Cfg);
+		val &= ~IOC_IOCmd_Cfg_TE;
+		out_be64(cregs + IOC_IOCmd_Cfg, val);
+		(void)in_be64(cregs + IOC_IOCmd_Cfg);
+
+		iounmap(xregs);
+	}
+}
 
-static int cell_map_iommu(void)
+static int __init cell_iommu_init_disabled(void)
 {
-	unsigned int num_nodes = 0, *node_id;
-	unsigned long *base, *mmio_base;
-	struct device_node *dn;
-	struct cell_iommu *iommu = NULL;
+	struct device_node *np = NULL;
+	unsigned long base = 0, size;
+
+	/* When no iommu is present, we use direct DMA ops */
+	set_pci_dma_ops(&dma_direct_ops);
+
+	/* First make sure all IOC translation is turned off */
+	cell_disable_iommus();
+
+	/* If we have no Axon, we set up the spider DMA magic offset */
+	if (of_find_node_by_name(NULL, "axon") == NULL)
+		cell_dma_direct_offset = SPIDER_DMA_OFFSET;
+
+	/* Now we need to check to see where the memory is mapped
+	 * in PCI space. We assume that all busses use the same dma
+	 * window which is always the case so far on Cell, thus we
+	 * pick up the first pci-internal node we can find and check
+	 * the DMA window from there.
+	 */
+	for_each_node_by_name(np, "axon") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		if (cell_iommu_get_window(np, &base, &size) == 0)
+			break;
+	}
+	if (np == NULL) {
+		for_each_node_by_name(np, "pci-internal") {
+			if (np->parent == NULL || np->parent->parent != NULL)
+				continue;
+			if (cell_iommu_get_window(np, &base, &size) == 0)
+				break;
+		}
+	}
+	of_node_put(np);
+
+	/* If we found a DMA window, we check if it's big enough to enclose
+	 * all of physical memory. If not, we force enable IOMMU
+	 */
+	if (np && size < memblock_end_of_DRAM()) {
+		printk(KERN_WARNING "iommu: force-enabled, dma window"
+		       " (%ldMB) smaller than total memory (%lldMB)\n",
+		       size >> 20, memblock_end_of_DRAM() >> 20);
+		return -ENODEV;
+	}
 
-	/* determine number of nodes (=iommus) */
-	pr_debug("%s(%d): determining number of nodes...", __FUNCTION__, __LINE__);
-	for(dn = of_find_node_by_type(NULL, "cpu");
-	    dn;
-	    dn = of_find_node_by_type(dn, "cpu")) {
-		node_id = (unsigned int *)get_property(dn, "node-id", NULL);
+	cell_dma_direct_offset += base;
 
-		if (num_nodes < *node_id)
-			num_nodes = *node_id;
-		}
+	if (cell_dma_direct_offset != 0)
+		ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup;
 
-	num_nodes++;
-	pr_debug("%i found.\n", num_nodes);
+	printk("iommu: disabled, direct DMA offset is 0x%lx\n",
+	       cell_dma_direct_offset);
+
+	return 0;
+}
 
-	/* map the iommu registers for each node */
-	pr_debug("%s(%d): Looping through nodes\n", __FUNCTION__, __LINE__);
-	for(dn = of_find_node_by_type(NULL, "cpu");
-	    dn;
-	    dn = of_find_node_by_type(dn, "cpu")) {
+/*
+ *  Fixed IOMMU mapping support
+ *
+ *  This code adds support for setting up a fixed IOMMU mapping on certain
+ *  cell machines. For 64-bit devices this avoids the performance overhead of
+ *  mapping and unmapping pages at runtime. 32-bit devices are unable to use
+ *  the fixed mapping.
+ *
+ *  The fixed mapping is established at boot, and maps all of physical memory
+ *  1:1 into device space at some offset. On machines with < 30 GB of memory
+ *  we setup the fixed mapping immediately above the normal IOMMU window.
+ *
+ *  For example a machine with 4GB of memory would end up with the normal
+ *  IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In
+ *  this case a 64-bit device wishing to DMA to 1GB would be told to DMA to
+ *  3GB, plus any offset required by firmware. The firmware offset is encoded
+ *  in the "dma-ranges" property.
+ *
+ *  On machines with 30GB or more of memory, we are unable to place the fixed
+ *  mapping above the normal IOMMU window as we would run out of address space.
+ *  Instead we move the normal IOMMU window to coincide with the hash page
+ *  table, this region does not need to be part of the fixed mapping as no
+ *  device should ever be DMA'ing to it. We then setup the fixed mapping
+ *  from 0 to 32GB.
+ */
 
-		node_id = (unsigned int *)get_property(dn, "node-id", NULL);
-		base = (unsigned long *)get_property(dn, "ioc-cache", NULL);
-		mmio_base = (unsigned long *)get_property(dn, "ioc-translation", NULL);
+static u64 cell_iommu_get_fixed_address(struct device *dev)
+{
+	u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR;
+	struct device_node *np;
+	const u32 *ranges = NULL;
+	int i, len, best, naddr, nsize, pna, range_size;
+
+	np = of_node_get(dev->of_node);
+	while (1) {
+		naddr = of_n_addr_cells(np);
+		nsize = of_n_size_cells(np);
+		np = of_get_next_parent(np);
+		if (!np)
+			break;
+
+		ranges = of_get_property(np, "dma-ranges", &len);
+
+		/* Ignore empty ranges, they imply no translation required */
+		if (ranges && len > 0)
+			break;
+	}
 
-		if (!base || !mmio_base || !node_id)
-			return cell_map_iommu_hardcoded(num_nodes);
+	if (!ranges) {
+		dev_dbg(dev, "iommu: no dma-ranges found\n");
+		goto out;
+	}
 
-		iommu = &cell_iommus[*node_id];
-		iommu->base = *base;
-		iommu->mmio_base = *mmio_base;
+	len /= sizeof(u32);
 
-		iommu->mapped_base = __ioremap(*base, 0x1000, _PAGE_NO_CACHE);
-		iommu->mapped_mmio_base = __ioremap(*mmio_base, 0x1000, _PAGE_NO_CACHE);
+	pna = of_n_addr_cells(np);
+	range_size = naddr + nsize + pna;
 
-		enable_mapping(iommu->mapped_base,
-			       iommu->mapped_mmio_base);
+	/* dma-ranges format:
+	 * child addr	: naddr cells
+	 * parent addr	: pna cells
+	 * size		: nsize cells
+	 */
+	for (i = 0, best = -1, best_size = 0; i < len; i += range_size) {
+		cpu_addr = of_translate_dma_address(np, ranges + i + naddr);
+		size = of_read_number(ranges + i + naddr + pna, nsize);
 
-		/* everything else will be done in iommu_bus_setup */
+		if (cpu_addr == 0 && size > best_size) {
+			best = i;
+			best_size = size;
+		}
 	}
 
-	return 1;
+	if (best >= 0) {
+		dev_addr = of_read_number(ranges + best, naddr);
+	} else
+		dev_dbg(dev, "iommu: no suitable range found!\n");
+
+out:
+	of_node_put(np);
+
+	return dev_addr;
 }
 
-static void *cell_alloc_coherent(struct device *hwdev, size_t size,
-			   dma_addr_t *dma_handle, gfp_t flag)
+static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask)
 {
-	void *ret;
-
-	ret = (void *)__get_free_pages(flag, get_order(size));
-	if (ret != NULL) {
-		memset(ret, 0, size);
-		*dma_handle = virt_to_abs(ret) | CELL_DMA_VALID;
+	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+		return -EIO;
+
+	if (dma_mask == DMA_BIT_MASK(64) &&
+		cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+	{
+		dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+		set_dma_ops(dev, &dma_iommu_fixed_ops);
+	} else {
+		dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
+		set_dma_ops(dev, get_pci_dma_ops());
 	}
-	return ret;
+
+	cell_dma_dev_setup(dev);
+
+	*dev->dma_mask = dma_mask;
+
+	return 0;
 }
 
-static void cell_free_coherent(struct device *hwdev, size_t size,
-				 void *vaddr, dma_addr_t dma_handle)
+static void cell_dma_dev_setup_fixed(struct device *dev)
 {
-	free_pages((unsigned long)vaddr, get_order(size));
+	u64 addr;
+
+	addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base;
+	set_dma_offset(dev, addr);
+
+	dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
 }
 
-static dma_addr_t cell_map_single(struct device *hwdev, void *ptr,
-		size_t size, enum dma_data_direction direction)
+static void insert_16M_pte(unsigned long addr, unsigned long *ptab,
+			   unsigned long base_pte)
 {
-	return virt_to_abs(ptr) | CELL_DMA_VALID;
+	unsigned long segment, offset;
+
+	segment = addr >> IO_SEGMENT_SHIFT;
+	offset = (addr >> 24) - (segment << IO_PAGENO_BITS(24));
+	ptab = ptab + (segment * (1 << 12) / sizeof(unsigned long));
+
+	pr_debug("iommu: addr %lx ptab %p segment %lx offset %lx\n",
+		  addr, ptab, segment, offset);
+
+	ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask);
 }
 
-static void cell_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
-		size_t size, enum dma_data_direction direction)
+static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu,
+	struct device_node *np, unsigned long dbase, unsigned long dsize,
+	unsigned long fbase, unsigned long fsize)
 {
+	unsigned long base_pte, uaddr, ioaddr, *ptab;
+
+	ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize, 24);
+
+	dma_iommu_fixed_base = fbase;
+
+	pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase);
+
+	base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+		(cell_iommu_get_ioid(np) & CBE_IOPTE_IOID_Mask);
+
+	if (iommu_fixed_is_weak)
+		pr_info("IOMMU: Using weak ordering for fixed mapping\n");
+	else {
+		pr_info("IOMMU: Using strong ordering for fixed mapping\n");
+		base_pte |= CBE_IOPTE_SO_RW;
+	}
+
+	for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) {
+		/* Don't touch the dynamic region */
+		ioaddr = uaddr + fbase;
+		if (ioaddr >= dbase && ioaddr < (dbase + dsize)) {
+			pr_debug("iommu: fixed/dynamic overlap, skipping\n");
+			continue;
+		}
+
+		insert_16M_pte(uaddr, ptab, base_pte);
+	}
+
+	mb();
 }
 
-static int cell_map_sg(struct device *hwdev, struct scatterlist *sg,
-		int nents, enum dma_data_direction direction)
+static int __init cell_iommu_fixed_mapping_init(void)
 {
-	int i;
+	unsigned long dbase, dsize, fbase, fsize, hbase, hend;
+	struct cbe_iommu *iommu;
+	struct device_node *np;
+
+	/* The fixed mapping is only supported on axon machines */
+	np = of_find_node_by_name(NULL, "axon");
+	of_node_put(np);
+
+	if (!np) {
+		pr_debug("iommu: fixed mapping disabled, no axons found\n");
+		return -1;
+	}
+
+	/* We must have dma-ranges properties for fixed mapping to work */
+	np = of_find_node_with_property(NULL, "dma-ranges");
+	of_node_put(np);
+
+	if (!np) {
+		pr_debug("iommu: no dma-ranges found, no fixed mapping\n");
+		return -1;
+	}
+
+	/* The default setup is to have the fixed mapping sit after the
+	 * dynamic region, so find the top of the largest IOMMU window
+	 * on any axon, then add the size of RAM and that's our max value.
+	 * If that is > 32GB we have to do other shennanigans.
+	 */
+	fbase = 0;
+	for_each_node_by_name(np, "axon") {
+		cell_iommu_get_window(np, &dbase, &dsize);
+		fbase = max(fbase, dbase + dsize);
+	}
+
+	fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT);
+	fsize = memblock_phys_mem_size();
+
+	if ((fbase + fsize) <= 0x800000000ul)
+		hbase = 0; /* use the device tree window */
+	else {
+		/* If we're over 32 GB we need to cheat. We can't map all of
+		 * RAM with the fixed mapping, and also fit the dynamic
+		 * region. So try to place the dynamic region where the hash
+		 * table sits, drivers never need to DMA to it, we don't
+		 * need a fixed mapping for that area.
+		 */
+		if (!htab_address) {
+			pr_debug("iommu: htab is NULL, on LPAR? Huh?\n");
+			return -1;
+		}
+		hbase = __pa(htab_address);
+		hend  = hbase + htab_size_bytes;
+
+		/* The window must start and end on a segment boundary */
+		if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) ||
+		    (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) {
+			pr_debug("iommu: hash window not segment aligned\n");
+			return -1;
+		}
+
+		/* Check the hash window fits inside the real DMA window */
+		for_each_node_by_name(np, "axon") {
+			cell_iommu_get_window(np, &dbase, &dsize);
+
+			if (hbase < dbase || (hend > (dbase + dsize))) {
+				pr_debug("iommu: hash window doesn't fit in"
+					 "real DMA window\n");
+				return -1;
+			}
+		}
+
+		fbase = 0;
+	}
+
+	/* Setup the dynamic regions */
+	for_each_node_by_name(np, "axon") {
+		iommu = cell_iommu_alloc(np);
+		BUG_ON(!iommu);
+
+		if (hbase == 0)
+			cell_iommu_get_window(np, &dbase, &dsize);
+		else {
+			dbase = hbase;
+			dsize = htab_size_bytes;
+		}
 
-	for (i = 0; i < nents; i++, sg++) {
-		sg->dma_address = (page_to_phys(sg->page) + sg->offset)
-					| CELL_DMA_VALID;
-		sg->dma_length = sg->length;
+		printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx "
+			"fixed window 0x%lx-0x%lx\n", iommu->nid, dbase,
+			 dbase + dsize, fbase, fbase + fsize);
+
+		cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize);
+		iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0,
+						    IOMMU_PAGE_SHIFT_4K);
+		cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize,
+					     fbase, fsize);
+		cell_iommu_enable_hardware(iommu);
+		cell_iommu_setup_window(iommu, np, dbase, dsize, 0);
 	}
 
-	return nents;
+	dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch;
+	set_pci_dma_ops(&dma_iommu_ops);
+
+	return 0;
 }
 
-static void cell_unmap_sg(struct device *hwdev, struct scatterlist *sg,
-		int nents, enum dma_data_direction direction)
+static int iommu_fixed_disabled;
+
+static int __init setup_iommu_fixed(char *str)
 {
+	struct device_node *pciep;
+
+	if (strcmp(str, "off") == 0)
+		iommu_fixed_disabled = 1;
+
+	/* If we can find a pcie-endpoint in the device tree assume that
+	 * we're on a triblade or a CAB so by default the fixed mapping
+	 * should be set to be weakly ordered; but only if the boot
+	 * option WASN'T set for strong ordering
+	 */
+	pciep = of_find_node_by_type(NULL, "pcie-endpoint");
+
+	if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
+		iommu_fixed_is_weak = 1;
+
+	of_node_put(pciep);
+
+	return 1;
 }
+__setup("iommu_fixed=", setup_iommu_fixed);
 
-static int cell_dma_supported(struct device *dev, u64 mask)
+static u64 cell_dma_get_required_mask(struct device *dev)
 {
-	return mask < 0x100000000ull;
+	struct dma_map_ops *dma_ops;
+
+	if (!dev->dma_mask)
+		return 0;
+
+	if (!iommu_fixed_disabled &&
+			cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+		return DMA_BIT_MASK(64);
+
+	dma_ops = get_dma_ops(dev);
+	if (dma_ops->get_required_mask)
+		return dma_ops->get_required_mask(dev);
+
+	WARN_ONCE(1, "no get_required_mask in %p ops", dma_ops);
+
+	return DMA_BIT_MASK(64);
 }
 
-void cell_init_iommu(void)
+static int __init cell_iommu_init(void)
 {
-	int setup_bus = 0;
-
-	if (of_find_node_by_path("/mambo")) {
-		pr_info("Not using iommu on systemsim\n");
-	} else {
+	struct device_node *np;
+
+	/* If IOMMU is disabled or we have little enough RAM to not need
+	 * to enable it, we setup a direct mapping.
+	 *
+	 * Note: should we make sure we have the IOMMU actually disabled ?
+	 */
+	if (iommu_is_off ||
+	    (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull))
+		if (cell_iommu_init_disabled() == 0)
+			goto bail;
+
+	/* Setup various ppc_md. callbacks */
+	ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup;
+	ppc_md.dma_get_required_mask = cell_dma_get_required_mask;
+	ppc_md.tce_build = tce_build_cell;
+	ppc_md.tce_free = tce_free_cell;
+
+	if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
+		goto bail;
+
+	/* Create an iommu for each /axon node.  */
+	for_each_node_by_name(np, "axon") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		cell_iommu_init_one(np, 0);
+	}
 
-		if (!(of_chosen &&
-		      get_property(of_chosen, "linux,iommu-off", NULL)))
-			setup_bus = cell_map_iommu();
-
-		if (setup_bus) {
-			pr_debug("%s: IOMMU mapping activated\n", __FUNCTION__);
-			ppc_md.iommu_dev_setup = iommu_dev_setup_null;
-			ppc_md.iommu_bus_setup = iommu_bus_setup;
-		} else {
-			pr_debug("%s: IOMMU mapping activated, "
-				 "no device action necessary\n", __FUNCTION__);
-			/* Direct I/O, IOMMU off */
-			ppc_md.iommu_dev_setup = iommu_dev_setup_null;
-			ppc_md.iommu_bus_setup = iommu_bus_setup_null;
-		}
+	/* Create an iommu for each toplevel /pci-internal node for
+	 * old hardware/firmware
+	 */
+	for_each_node_by_name(np, "pci-internal") {
+		if (np->parent == NULL || np->parent->parent != NULL)
+			continue;
+		cell_iommu_init_one(np, SPIDER_DMA_OFFSET);
 	}
 
-	pci_dma_ops.alloc_coherent = cell_alloc_coherent;
-	pci_dma_ops.free_coherent = cell_free_coherent;
-	pci_dma_ops.map_single = cell_map_single;
-	pci_dma_ops.unmap_single = cell_unmap_single;
-	pci_dma_ops.map_sg = cell_map_sg;
-	pci_dma_ops.unmap_sg = cell_unmap_sg;
-	pci_dma_ops.dma_supported = cell_dma_supported;
+	/* Setup default PCI iommu ops */
+	set_pci_dma_ops(&dma_iommu_ops);
+
+ bail:
+	/* Register callbacks on OF platform device addition/removal
+	 * to handle linking them to the right DMA operations
+	 */
+	bus_register_notifier(&platform_bus_type, &cell_of_bus_notifier);
+
+	return 0;
 }
+machine_arch_initcall(cell, cell_iommu_init);
+machine_arch_initcall(celleb_native, cell_iommu_init);
+
diff --git a/arch/powerpc/platforms/cell/iommu.h b/arch/powerpc/platforms/cell/iommu.h
deleted file mode 100644
index 490d77abfe8..00000000000
--- a/arch/powerpc/platforms/cell/iommu.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef CELL_IOMMU_H
-#define CELL_IOMMU_H
-
-/* some constants */
-enum {
-	/* segment table entries */
-	IOST_VALID_MASK	  = 0x8000000000000000ul,
-	IOST_TAG_MASK     = 0x3000000000000000ul,
-	IOST_PT_BASE_MASK = 0x000003fffffff000ul,
-	IOST_NNPT_MASK	  = 0x0000000000000fe0ul,
-	IOST_PS_MASK	  = 0x000000000000000ful,
-
-	IOST_PS_4K	  = 0x1,
-	IOST_PS_64K	  = 0x3,
-	IOST_PS_1M	  = 0x5,
-	IOST_PS_16M	  = 0x7,
-
-	/* iopt tag register */
-	IOPT_VALID_MASK   = 0x0000000200000000ul,
-	IOPT_TAG_MASK	  = 0x00000001fffffffful,
-
-	/* iopt cache register */
-	IOPT_PROT_MASK	  = 0xc000000000000000ul,
-	IOPT_PROT_NONE	  = 0x0000000000000000ul,
-	IOPT_PROT_READ	  = 0x4000000000000000ul,
-	IOPT_PROT_WRITE	  = 0x8000000000000000ul,
-	IOPT_PROT_RW	  = 0xc000000000000000ul,
-	IOPT_COHERENT	  = 0x2000000000000000ul,
-	
-	IOPT_ORDER_MASK	  = 0x1800000000000000ul,
-	/* order access to same IOID/VC on same address */
-	IOPT_ORDER_ADDR	  = 0x0800000000000000ul,
-	/* similar, but only after a write access */
-	IOPT_ORDER_WRITES = 0x1000000000000000ul,
-	/* Order all accesses to same IOID/VC */
-	IOPT_ORDER_VC	  = 0x1800000000000000ul,
-	
-	IOPT_RPN_MASK	  = 0x000003fffffff000ul,
-	IOPT_HINT_MASK	  = 0x0000000000000800ul,
-	IOPT_IOID_MASK	  = 0x00000000000007fful,
-
-	IOSTO_ENABLE	  = 0x8000000000000000ul,
-	IOSTO_ORIGIN	  = 0x000003fffffff000ul,
-	IOSTO_HW	  = 0x0000000000000800ul,
-	IOSTO_SW	  = 0x0000000000000400ul,
-
-	IOCMD_CONF_TE	  = 0x0000800000000000ul,
-
-	/* memory mapped registers */
-	IOC_PT_CACHE_DIR  = 0x000,
-	IOC_ST_CACHE_DIR  = 0x800,
-	IOC_PT_CACHE_REG  = 0x910,
-	IOC_ST_ORIGIN     = 0x918,
-	IOC_CONF	  = 0x930,
-
-	/* The high bit needs to be set on every DMA address,
-	   only 2GB are addressable */
-	CELL_DMA_VALID	  = 0x80000000,
-	CELL_DMA_MASK	  = 0x7fffffff,
-};
-
-
-void cell_init_iommu(void);
-
-#endif
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
index e0e051c675d..d17e98bc0c1 100644
--- a/arch/powerpc/platforms/cell/pervasive.c
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -23,7 +23,6 @@
 
 #undef DEBUG
 
-#include <linux/config.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/percpu.h>
@@ -35,111 +34,54 @@
 #include <asm/prom.h>
 #include <asm/pgtable.h>
 #include <asm/reg.h>
+#include <asm/cell-regs.h>
 
 #include "pervasive.h"
 
-static DEFINE_SPINLOCK(cbe_pervasive_lock);
-struct cbe_pervasive {
-	struct pmd_regs __iomem *regs;
-	unsigned int thread;
-};
-
-/* can't use per_cpu from setup_arch */
-static struct cbe_pervasive cbe_pervasive[NR_CPUS];
-
-static void __init cbe_enable_pause_zero(void)
+static void cbe_power_save(void)
 {
-	unsigned long thread_switch_control;
-	unsigned long temp_register;
-	struct cbe_pervasive *p;
-	int thread;
-
-	spin_lock_irq(&cbe_pervasive_lock);
-	p = &cbe_pervasive[smp_processor_id()];
+	unsigned long ctrl, thread_switch_control;
 
-	if (!cbe_pervasive->regs)
-		goto out;
-
-	pr_debug("Power Management: CPU %d\n", smp_processor_id());
-
-	 /* Enable Pause(0) control bit */
-	temp_register = in_be64(&p->regs->pm_control);
+	/* Ensure our interrupt state is properly tracked */
+	if (!prep_irq_for_idle())
+		return;
 
-	out_be64(&p->regs->pm_control,
-		 temp_register|PMD_PAUSE_ZERO_CONTROL);
+	ctrl = mfspr(SPRN_CTRLF);
 
 	/* Enable DEC and EE interrupt request */
 	thread_switch_control  = mfspr(SPRN_TSC_CELL);
 	thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST;
 
-	switch ((mfspr(SPRN_CTRLF) & CTRL_CT)) {
+	switch (ctrl & CTRL_CT) {
 	case CTRL_CT0:
 		thread_switch_control |= TSC_CELL_DEC_ENABLE_0;
-		thread = 0;
 		break;
 	case CTRL_CT1:
 		thread_switch_control |= TSC_CELL_DEC_ENABLE_1;
-		thread = 1;
 		break;
 	default:
 		printk(KERN_WARNING "%s: unknown configuration\n",
-			__FUNCTION__);
-		thread = -1;
+			__func__);
 		break;
 	}
-
-	if (p->thread != thread)
-		printk(KERN_WARNING "%s: device tree inconsistant, "
-				     "cpu %i: %d/%d\n", __FUNCTION__,
-				     smp_processor_id(),
-				     p->thread, thread);
-
 	mtspr(SPRN_TSC_CELL, thread_switch_control);
 
-out:
-	spin_unlock_irq(&cbe_pervasive_lock);
-}
-
-static void cbe_idle(void)
-{
-	unsigned long ctrl;
-
-	cbe_enable_pause_zero();
-
-	while (1) {
-		if (!need_resched()) {
-			local_irq_disable();
-			while (!need_resched()) {
-				/* go into low thread priority */
-				HMT_low();
-
-				/*
-				 * atomically disable thread execution
-				 * and runlatch.
-				 * External and Decrementer exceptions
-				 * are still handled when the thread
-				 * is disabled but now enter in
-				 * cbe_system_reset_exception()
-				 */
-				ctrl = mfspr(SPRN_CTRLF);
-				ctrl &= ~(CTRL_RUNLATCH | CTRL_TE);
-				mtspr(SPRN_CTRLT, ctrl);
-			}
-			/* restore thread prio */
-			HMT_medium();
-			local_irq_enable();
-		}
-
-		/*
-		 * turn runlatch on again before scheduling the
-		 * process we just woke up
-		 */
-		ppc64_runlatch_on();
-
-		preempt_enable_no_resched();
-		schedule();
-		preempt_disable();
-	}
+	/*
+	 * go into low thread priority, medium priority will be
+	 * restored for us after wake-up.
+	 */
+	HMT_low();
+
+	/*
+	 * atomically disable thread execution and runlatch.
+	 * External and Decrementer exceptions are still handled when the
+	 * thread is disabled but now enter in cbe_system_reset_exception()
+	 */
+	ctrl &= ~(CTRL_RUNLATCH | CTRL_TE);
+	mtspr(SPRN_CTRLT, ctrl);
+
+	/* Re-enable interrupts in MSR */
+	__hard_irq_enable();
 }
 
 static int cbe_system_reset_exception(struct pt_regs *regs)
@@ -152,8 +94,15 @@ static int cbe_system_reset_exception(struct pt_regs *regs)
 		timer_interrupt(regs);
 		break;
 	case SRR1_WAKEMT:
-		/* no action required */
+		return cbe_sysreset_hack();
+#ifdef CONFIG_CBE_RAS
+	case SRR1_WAKESYSERR:
+		cbe_system_error_exception(regs);
 		break;
+	case SRR1_WAKETHERM:
+		cbe_thermal_exception(regs);
+		break;
+#endif /* CONFIG_CBE_RAS */
 	default:
 		/* do system reset */
 		return 0;
@@ -162,68 +111,23 @@ static int cbe_system_reset_exception(struct pt_regs *regs)
 	return 1;
 }
 
-static int __init cbe_find_pmd_mmio(int cpu, struct cbe_pervasive *p)
+void __init cbe_pervasive_init(void)
 {
-	struct device_node *node;
-	unsigned int *int_servers;
-	char *addr;
-	unsigned long real_address;
-	unsigned int size;
-
-	struct pmd_regs __iomem *pmd_mmio_area;
-	int hardid, thread;
-	int proplen;
-
-	pmd_mmio_area = NULL;
-	hardid = get_hard_smp_processor_id(cpu);
-	for (node = NULL; (node = of_find_node_by_type(node, "cpu"));) {
-		int_servers = (void *) get_property(node,
-				"ibm,ppc-interrupt-server#s", &proplen);
-		if (!int_servers) {
-			printk(KERN_WARNING "%s misses "
-				"ibm,ppc-interrupt-server#s property",
-				node->full_name);
-			continue;
-		}
-		for (thread = 0; thread < proplen / sizeof (int); thread++) {
-			if (hardid == int_servers[thread]) {
-				addr = get_property(node, "pervasive", NULL);
-				goto found;
-			}
-		}
-	}
-
-	printk(KERN_WARNING "%s: CPU %d not found\n", __FUNCTION__, cpu);
-	return -EINVAL;
-
-found:
-	real_address = *(unsigned long*) addr;
-	addr += sizeof (unsigned long);
-	size = *(unsigned int*) addr;
-
-	pr_debug("pervasive area for CPU %d at %lx, size %x\n",
-			cpu, real_address, size);
-	p->regs = __ioremap(real_address, size, _PAGE_NO_CACHE);
-	p->thread = thread;
-	return 0;
-}
-
-void __init cell_pervasive_init(void)
-{
-	struct cbe_pervasive *p;
 	int cpu;
-	int ret;
 
 	if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO))
 		return;
 
-	for_each_cpu(cpu) {
-		p = &cbe_pervasive[cpu];
-		ret = cbe_find_pmd_mmio(cpu, p);
-		if (ret)
-			return;
+	for_each_possible_cpu(cpu) {
+		struct cbe_pmd_regs __iomem *regs = cbe_get_cpu_pmd_regs(cpu);
+		if (!regs)
+			continue;
+
+		 /* Enable Pause(0) control bit */
+		out_be64(&regs->pmcr, in_be64(&regs->pmcr) |
+					    CBE_PMD_PAUSE_ZERO_CONTROL);
 	}
 
-	ppc_md.idle_loop = cbe_idle;
+	ppc_md.power_save = cbe_power_save;
 	ppc_md.system_reset_exception = cbe_system_reset_exception;
 }
diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h
index da1fb85ca3e..fd4d7b7092b 100644
--- a/arch/powerpc/platforms/cell/pervasive.h
+++ b/arch/powerpc/platforms/cell/pervasive.h
@@ -25,38 +25,18 @@
 #ifndef PERVASIVE_H
 #define PERVASIVE_H
 
-struct pmd_regs {
-	u8 pad_0x0000_0x0800[0x0800 - 0x0000];			/* 0x0000 */
-
-	/* Thermal Sensor Registers */
-	u64  ts_ctsr1;						/* 0x0800 */
-	u64  ts_ctsr2;						/* 0x0808 */
-	u64  ts_mtsr1;						/* 0x0810 */
-	u64  ts_mtsr2;						/* 0x0818 */
-	u64  ts_itr1;						/* 0x0820 */
-	u64  ts_itr2;						/* 0x0828 */
-	u64  ts_gitr;						/* 0x0830 */
-	u64  ts_isr;						/* 0x0838 */
-	u64  ts_imr;						/* 0x0840 */
-	u64  tm_cr1;						/* 0x0848 */
-	u64  tm_cr2;						/* 0x0850 */
-	u64  tm_simr;						/* 0x0858 */
-	u64  tm_tpr;						/* 0x0860 */
-	u64  tm_str1;						/* 0x0868 */
-	u64  tm_str2;						/* 0x0870 */
-	u64  tm_tsr;						/* 0x0878 */
-
-	/* Power Management */
-	u64  pm_control;					/* 0x0880 */
-#define PMD_PAUSE_ZERO_CONTROL		0x10000
-	u64  pm_status;						/* 0x0888 */
-
-	/* Time Base Register */
-	u64  tbr;						/* 0x0890 */
-
-	u8   pad_0x0898_0x1000 [0x1000 - 0x0898];		/* 0x0898 */
-};
-
-void __init cell_pervasive_init(void);
+extern void cbe_pervasive_init(void);
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+extern int cbe_sysreset_hack(void);
+#else
+static inline int cbe_sysreset_hack(void)
+{
+	return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
 
 #endif
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
new file mode 100644
index 00000000000..348a27b1251
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -0,0 +1,424 @@
+/*
+ * Cell Broadband Engine Performance Monitor
+ *
+ * (C) Copyright IBM Corporation 2001,2006
+ *
+ * Author:
+ *    David Erb (djerb@us.ibm.com)
+ *    Kevin Corry (kevcorry@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+#include <asm/machdep.h>
+#include <asm/pmc.h>
+#include <asm/reg.h>
+#include <asm/spu.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+/*
+ * When writing to write-only mmio addresses, save a shadow copy. All of the
+ * registers are 32-bit, but stored in the upper-half of a 64-bit field in
+ * pmd_regs.
+ */
+
+#define WRITE_WO_MMIO(reg, x)					\
+	do {							\
+		u32 _x = (x);					\
+		struct cbe_pmd_regs __iomem *pmd_regs;		\
+		struct cbe_pmd_shadow_regs *shadow_regs;	\
+		pmd_regs = cbe_get_cpu_pmd_regs(cpu);		\
+		shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);	\
+		out_be64(&(pmd_regs->reg), (((u64)_x) << 32));	\
+		shadow_regs->reg = _x;				\
+	} while (0)
+
+#define READ_SHADOW_REG(val, reg)				\
+	do {							\
+		struct cbe_pmd_shadow_regs *shadow_regs;	\
+		shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);	\
+		(val) = shadow_regs->reg;			\
+	} while (0)
+
+#define READ_MMIO_UPPER32(val, reg)				\
+	do {							\
+		struct cbe_pmd_regs __iomem *pmd_regs;		\
+		pmd_regs = cbe_get_cpu_pmd_regs(cpu);		\
+		(val) = (u32)(in_be64(&pmd_regs->reg) >> 32);	\
+	} while (0)
+
+/*
+ * Physical counter registers.
+ * Each physical counter can act as one 32-bit counter or two 16-bit counters.
+ */
+
+u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr)
+{
+	u32 val_in_latch, val = 0;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		READ_SHADOW_REG(val_in_latch, counter_value_in_latch);
+
+		/* Read the latch or the actual counter, whichever is newer. */
+		if (val_in_latch & (1 << phys_ctr)) {
+			READ_SHADOW_REG(val, pm_ctr[phys_ctr]);
+		} else {
+			READ_MMIO_UPPER32(val, pm_ctr[phys_ctr]);
+		}
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_phys_ctr);
+
+void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val)
+{
+	struct cbe_pmd_shadow_regs *shadow_regs;
+	u32 pm_ctrl;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		/* Writing to a counter only writes to a hardware latch.
+		 * The new value is not propagated to the actual counter
+		 * until the performance monitor is enabled.
+		 */
+		WRITE_WO_MMIO(pm_ctr[phys_ctr], val);
+
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		if (pm_ctrl & CBE_PM_ENABLE_PERF_MON) {
+			/* The counters are already active, so we need to
+			 * rewrite the pm_control register to "re-enable"
+			 * the PMU.
+			 */
+			cbe_write_pm(cpu, pm_control, pm_ctrl);
+		} else {
+			shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+			shadow_regs->counter_value_in_latch |= (1 << phys_ctr);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_write_phys_ctr);
+
+/*
+ * "Logical" counter registers.
+ * These will read/write 16-bits or 32-bits depending on the
+ * current size of the counter. Counters 4 - 7 are always 16-bit.
+ */
+
+u32 cbe_read_ctr(u32 cpu, u32 ctr)
+{
+	u32 val;
+	u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+	val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+	if (cbe_get_ctr_size(cpu, phys_ctr) == 16)
+		val = (ctr < NR_PHYS_CTRS) ? (val >> 16) : (val & 0xffff);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_ctr);
+
+void cbe_write_ctr(u32 cpu, u32 ctr, u32 val)
+{
+	u32 phys_ctr;
+	u32 phys_val;
+
+	phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+	if (cbe_get_ctr_size(cpu, phys_ctr) == 16) {
+		phys_val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+		if (ctr < NR_PHYS_CTRS)
+			val = (val << 16) | (phys_val & 0xffff);
+		else
+			val = (val & 0xffff) | (phys_val & 0xffff0000);
+	}
+
+	cbe_write_phys_ctr(cpu, phys_ctr, val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_ctr);
+
+/*
+ * Counter-control registers.
+ * Each "logical" counter has a corresponding control register.
+ */
+
+u32 cbe_read_pm07_control(u32 cpu, u32 ctr)
+{
+	u32 pm07_control = 0;
+
+	if (ctr < NR_CTRS)
+		READ_SHADOW_REG(pm07_control, pm07_control[ctr]);
+
+	return pm07_control;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm07_control);
+
+void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val)
+{
+	if (ctr < NR_CTRS)
+		WRITE_WO_MMIO(pm07_control[ctr], val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm07_control);
+
+/*
+ * Other PMU control registers. Most of these are write-only.
+ */
+
+u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg)
+{
+	u32 val = 0;
+
+	switch (reg) {
+	case group_control:
+		READ_SHADOW_REG(val, group_control);
+		break;
+
+	case debug_bus_control:
+		READ_SHADOW_REG(val, debug_bus_control);
+		break;
+
+	case trace_address:
+		READ_MMIO_UPPER32(val, trace_address);
+		break;
+
+	case ext_tr_timer:
+		READ_SHADOW_REG(val, ext_tr_timer);
+		break;
+
+	case pm_status:
+		READ_MMIO_UPPER32(val, pm_status);
+		break;
+
+	case pm_control:
+		READ_SHADOW_REG(val, pm_control);
+		break;
+
+	case pm_interval:
+		READ_MMIO_UPPER32(val, pm_interval);
+		break;
+
+	case pm_start_stop:
+		READ_SHADOW_REG(val, pm_start_stop);
+		break;
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm);
+
+void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val)
+{
+	switch (reg) {
+	case group_control:
+		WRITE_WO_MMIO(group_control, val);
+		break;
+
+	case debug_bus_control:
+		WRITE_WO_MMIO(debug_bus_control, val);
+		break;
+
+	case trace_address:
+		WRITE_WO_MMIO(trace_address, val);
+		break;
+
+	case ext_tr_timer:
+		WRITE_WO_MMIO(ext_tr_timer, val);
+		break;
+
+	case pm_status:
+		WRITE_WO_MMIO(pm_status, val);
+		break;
+
+	case pm_control:
+		WRITE_WO_MMIO(pm_control, val);
+		break;
+
+	case pm_interval:
+		WRITE_WO_MMIO(pm_interval, val);
+		break;
+
+	case pm_start_stop:
+		WRITE_WO_MMIO(pm_start_stop, val);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm);
+
+/*
+ * Get/set the size of a physical counter to either 16 or 32 bits.
+ */
+
+u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr)
+{
+	u32 pm_ctrl, size = 0;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		size = (pm_ctrl & CBE_PM_16BIT_CTR(phys_ctr)) ? 16 : 32;
+	}
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(cbe_get_ctr_size);
+
+void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size)
+{
+	u32 pm_ctrl;
+
+	if (phys_ctr < NR_PHYS_CTRS) {
+		pm_ctrl = cbe_read_pm(cpu, pm_control);
+		switch (ctr_size) {
+		case 16:
+			pm_ctrl |= CBE_PM_16BIT_CTR(phys_ctr);
+			break;
+
+		case 32:
+			pm_ctrl &= ~CBE_PM_16BIT_CTR(phys_ctr);
+			break;
+		}
+		cbe_write_pm(cpu, pm_control, pm_ctrl);
+	}
+}
+EXPORT_SYMBOL_GPL(cbe_set_ctr_size);
+
+/*
+ * Enable/disable the entire performance monitoring unit.
+ * When we enable the PMU, all pending writes to counters get committed.
+ */
+
+void cbe_enable_pm(u32 cpu)
+{
+	struct cbe_pmd_shadow_regs *shadow_regs;
+	u32 pm_ctrl;
+
+	shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+	shadow_regs->counter_value_in_latch = 0;
+
+	pm_ctrl = cbe_read_pm(cpu, pm_control) | CBE_PM_ENABLE_PERF_MON;
+	cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm);
+
+void cbe_disable_pm(u32 cpu)
+{
+	u32 pm_ctrl;
+	pm_ctrl = cbe_read_pm(cpu, pm_control) & ~CBE_PM_ENABLE_PERF_MON;
+	cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm);
+
+/*
+ * Reading from the trace_buffer.
+ * The trace buffer is two 64-bit registers. Reading from
+ * the second half automatically increments the trace_address.
+ */
+
+void cbe_read_trace_buffer(u32 cpu, u64 *buf)
+{
+	struct cbe_pmd_regs __iomem *pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+
+	*buf++ = in_be64(&pmd_regs->trace_buffer_0_63);
+	*buf++ = in_be64(&pmd_regs->trace_buffer_64_127);
+}
+EXPORT_SYMBOL_GPL(cbe_read_trace_buffer);
+
+/*
+ * Enabling/disabling interrupts for the entire performance monitoring unit.
+ */
+
+u32 cbe_get_and_clear_pm_interrupts(u32 cpu)
+{
+	/* Reading pm_status clears the interrupt bits. */
+	return cbe_read_pm(cpu, pm_status);
+}
+EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts);
+
+void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask)
+{
+	/* Set which node and thread will handle the next interrupt. */
+	iic_set_interrupt_routing(cpu, thread, 0);
+
+	/* Enable the interrupt bits in the pm_status register. */
+	if (mask)
+		cbe_write_pm(cpu, pm_status, mask);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts);
+
+void cbe_disable_pm_interrupts(u32 cpu)
+{
+	cbe_get_and_clear_pm_interrupts(cpu);
+	cbe_write_pm(cpu, pm_status, 0);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts);
+
+static irqreturn_t cbe_pm_irq(int irq, void *dev_id)
+{
+	perf_irq(get_irq_regs());
+	return IRQ_HANDLED;
+}
+
+static int __init cbe_init_pm_irq(void)
+{
+	unsigned int irq;
+	int rc, node;
+
+	for_each_online_node(node) {
+		irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI |
+					       (node << IIC_IRQ_NODE_SHIFT));
+		if (irq == NO_IRQ) {
+			printk("ERROR: Unable to allocate irq for node %d\n",
+			       node);
+			return -EINVAL;
+		}
+
+		rc = request_irq(irq, cbe_pm_irq,
+				 0, "cbe-pmu-0", NULL);
+		if (rc) {
+			printk("ERROR: Request for irq on node %d failed\n",
+			       node);
+			return rc;
+		}
+	}
+
+	return 0;
+}
+machine_arch_initcall(cell, cbe_init_pm_irq);
+
+void cbe_sync_irq(int node)
+{
+	unsigned int irq;
+
+	irq = irq_find_mapping(NULL,
+			       IIC_IRQ_IOEX_PMI
+			       | (node << IIC_IRQ_NODE_SHIFT));
+
+	if (irq == NO_IRQ) {
+		printk(KERN_WARNING "ERROR, unable to get existing irq %d " \
+		"for node %d\n", irq, node);
+		return;
+	}
+
+	synchronize_irq(irq);
+}
+EXPORT_SYMBOL_GPL(cbe_sync_irq);
+
diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c
new file mode 100644
index 00000000000..6e3409d590a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/qpace_setup.c
@@ -0,0 +1,148 @@
+/*
+ *  linux/arch/powerpc/platforms/cell/qpace_setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Adapted from 'alpha' version by Gary Thomas
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  Modified by PPC64 Team, IBM Corp
+ *  Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ *  Modified by Benjamin Krill <ben@codiert.org>, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/console.h>
+#include <linux/of_platform.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/kexec.h>
+#include <asm/pgtable.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/cputable.h>
+#include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+static void qpace_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *root;
+	const char *model = "";
+
+	root = of_find_node_by_path("/");
+	if (root)
+		model = of_get_property(root, "model", NULL);
+	seq_printf(m, "machine\t\t: CHRP %s\n", model);
+	of_node_put(root);
+}
+
+static void qpace_progress(char *s, unsigned short hex)
+{
+	printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static const struct of_device_id qpace_bus_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .type = "spider", },
+	{ .type = "axon", },
+	{ .type = "plb5", },
+	{ .type = "plb4", },
+	{ .type = "opb", },
+	{ .type = "ebc", },
+	{},
+};
+
+static int __init qpace_publish_devices(void)
+{
+	int node;
+
+	/* Publish OF platform devices for southbridge IOs */
+	of_platform_bus_probe(NULL, qpace_bus_ids, NULL);
+
+	/* There is no device for the MIC memory controller, thus we create
+	 * a platform device for it to attach the EDAC driver to.
+	 */
+	for_each_online_node(node) {
+		if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+			continue;
+		platform_device_register_simple("cbe-mic", node, NULL, 0);
+	}
+
+	return 0;
+}
+machine_subsys_initcall(qpace, qpace_publish_devices);
+
+static void __init qpace_setup_arch(void)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_priv1_ops = &spu_priv1_mmio_ops;
+	spu_management_ops = &spu_management_of_ops;
+#endif
+
+	cbe_regs_init();
+
+#ifdef CONFIG_CBE_RAS
+	cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+	smp_init_cell();
+#endif
+
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	cbe_pervasive_init();
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+}
+
+static int __init qpace_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "IBM,QPACE"))
+		return 0;
+
+	hpte_init_native();
+
+	return 1;
+}
+
+define_machine(qpace) {
+	.name			= "QPACE",
+	.probe			= qpace_probe,
+	.setup_arch		= qpace_setup_arch,
+	.show_cpuinfo		= qpace_show_cpuinfo,
+	.restart		= rtas_restart,
+	.power_off		= rtas_power_off,
+	.halt			= rtas_halt,
+	.get_boot_time		= rtas_get_boot_time,
+	.get_rtc_time		= rtas_get_rtc_time,
+	.set_rtc_time		= rtas_set_rtc_time,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= qpace_progress,
+	.init_IRQ		= iic_init_IRQ,
+};
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
new file mode 100644
index 00000000000..e865d748179
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright 2006-2008, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+
+#include <asm/kexec.h>
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/cell-regs.h>
+
+#include "ras.h"
+
+
+static void dump_fir(int cpu)
+{
+	struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
+	struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
+
+	if (pregs == NULL)
+		return;
+
+	/* Todo: do some nicer parsing of bits and based on them go down
+	 * to other sub-units FIRs and not only IIC
+	 */
+	printk(KERN_ERR "Global Checkstop FIR    : 0x%016llx\n",
+	       in_be64(&pregs->checkstop_fir));
+	printk(KERN_ERR "Global Recoverable FIR  : 0x%016llx\n",
+	       in_be64(&pregs->checkstop_fir));
+	printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n",
+	       in_be64(&pregs->spec_att_mchk_fir));
+
+	if (iregs == NULL)
+		return;
+	printk(KERN_ERR "IOC FIR                 : 0x%016llx\n",
+	       in_be64(&iregs->ioc_fir));
+
+}
+
+void cbe_system_error_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
+	dump_fir(cpu);
+	dump_stack();
+}
+
+void cbe_maintenance_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * Nothing implemented for the maintenance interrupt at this point
+	 */
+
+	printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
+	dump_stack();
+}
+
+void cbe_thermal_exception(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * Nothing implemented for the thermal interrupt at this point
+	 */
+
+	printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
+	dump_stack();
+}
+
+static int cbe_machine_check_handler(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
+	dump_fir(cpu);
+
+	/* No recovery from this code now, lets continue */
+	return 0;
+}
+
+struct ptcal_area {
+	struct list_head list;
+	int nid;
+	int order;
+	struct page *pages;
+};
+
+static LIST_HEAD(ptcal_list);
+
+static int ptcal_start_tok, ptcal_stop_tok;
+
+static int __init cbe_ptcal_enable_on_node(int nid, int order)
+{
+	struct ptcal_area *area;
+	int ret = -ENOMEM;
+	unsigned long addr;
+
+	if (is_kdump_kernel())
+		rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
+
+	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		goto out_err;
+
+	area->nid = nid;
+	area->order = order;
+	area->pages = alloc_pages_exact_node(area->nid,
+						GFP_KERNEL|__GFP_THISNODE,
+						area->order);
+
+	if (!area->pages) {
+		printk(KERN_WARNING "%s: no page on node %d\n",
+			__func__, area->nid);
+		goto out_free_area;
+	}
+
+	/*
+	 * We move the ptcal area to the middle of the allocated
+	 * page, in order to avoid prefetches in memcpy and similar
+	 * functions stepping on it.
+	 */
+	addr = __pa(page_address(area->pages)) + (PAGE_SIZE >> 1);
+	printk(KERN_DEBUG "%s: enabling PTCAL on node %d address=0x%016lx\n",
+			__func__, area->nid, addr);
+
+	ret = -EIO;
+	if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid,
+				(unsigned int)(addr >> 32),
+				(unsigned int)(addr & 0xffffffff))) {
+		printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n",
+				__func__, nid);
+		goto out_free_pages;
+	}
+
+	list_add(&area->list, &ptcal_list);
+
+	return 0;
+
+out_free_pages:
+	__free_pages(area->pages, area->order);
+out_free_area:
+	kfree(area);
+out_err:
+	return ret;
+}
+
+static int __init cbe_ptcal_enable(void)
+{
+	const u32 *size;
+	struct device_node *np;
+	int order, found_mic = 0;
+
+	np = of_find_node_by_path("/rtas");
+	if (!np)
+		return -ENODEV;
+
+	size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
+	if (!size) {
+		of_node_put(np);
+		return -ENODEV;
+	}
+
+	pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size);
+	order = get_order(*size);
+	of_node_put(np);
+
+	/* support for malta device trees, with be@/mic@ nodes */
+	for_each_node_by_type(np, "mic-tm") {
+		cbe_ptcal_enable_on_node(of_node_to_nid(np), order);
+		found_mic = 1;
+	}
+
+	if (found_mic)
+		return 0;
+
+	/* support for older device tree - use cpu nodes */
+	for_each_node_by_type(np, "cpu") {
+		const u32 *nid = of_get_property(np, "node-id", NULL);
+		if (!nid) {
+			printk(KERN_ERR "%s: node %s is missing node-id?\n",
+					__func__, np->full_name);
+			continue;
+		}
+		cbe_ptcal_enable_on_node(*nid, order);
+		found_mic = 1;
+	}
+
+	return found_mic ? 0 : -ENODEV;
+}
+
+static int cbe_ptcal_disable(void)
+{
+	struct ptcal_area *area, *tmp;
+	int ret = 0;
+
+	pr_debug("%s: disabling PTCAL\n", __func__);
+
+	list_for_each_entry_safe(area, tmp, &ptcal_list, list) {
+		/* disable ptcal on this node */
+		if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) {
+			printk(KERN_ERR "%s: error disabling PTCAL "
+					"on node %d!\n", __func__,
+					area->nid);
+			ret = -EIO;
+			continue;
+		}
+
+		/* ensure we can access the PTCAL area */
+		memset(page_address(area->pages), 0,
+				1 << (area->order + PAGE_SHIFT));
+
+		/* clean up */
+		list_del(&area->list);
+		__free_pages(area->pages, area->order);
+		kfree(area);
+	}
+
+	return ret;
+}
+
+static int cbe_ptcal_notify_reboot(struct notifier_block *nb,
+		unsigned long code, void *data)
+{
+	return cbe_ptcal_disable();
+}
+
+static void cbe_ptcal_crash_shutdown(void)
+{
+	cbe_ptcal_disable();
+}
+
+static struct notifier_block cbe_ptcal_reboot_notifier = {
+	.notifier_call = cbe_ptcal_notify_reboot
+};
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+static int sysreset_hack;
+
+static int __init cbe_sysreset_init(void)
+{
+	struct cbe_pmd_regs __iomem *regs;
+
+	sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0");
+	if (!sysreset_hack)
+		return 0;
+
+	regs = cbe_get_cpu_pmd_regs(0);
+	if (!regs)
+		return 0;
+
+	/* Enable JTAG system-reset hack */
+	out_be32(&regs->fir_mode_reg,
+		in_be32(&regs->fir_mode_reg) |
+		CBE_PMD_FIR_MODE_M8);
+
+	return 0;
+}
+device_initcall(cbe_sysreset_init);
+
+int cbe_sysreset_hack(void)
+{
+	struct cbe_pmd_regs __iomem *regs;
+
+	/*
+	 * The BMC can inject user triggered system reset exceptions,
+	 * but cannot set the system reset reason in srr1,
+	 * so check an extra register here.
+	 */
+	if (sysreset_hack && (smp_processor_id() == 0)) {
+		regs = cbe_get_cpu_pmd_regs(0);
+		if (!regs)
+			return 0;
+		if (in_be64(&regs->ras_esc_0) & 0x0000ffff) {
+			out_be64(&regs->ras_esc_0, 0);
+			return 0;
+		}
+	}
+	return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+int __init cbe_ptcal_init(void)
+{
+	int ret;
+	ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal");
+	ptcal_stop_tok = rtas_token("ibm,cbe-stop-ptcal");
+
+	if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE
+			|| ptcal_stop_tok == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier);
+	if (ret)
+		goto out1;
+
+	ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown);
+	if (ret)
+		goto out2;
+
+	return cbe_ptcal_enable();
+
+out2:
+	unregister_reboot_notifier(&cbe_ptcal_reboot_notifier);
+out1:
+	printk(KERN_ERR "Can't disable PTCAL, so not enabling\n");
+	return ret;
+}
+
+arch_initcall(cbe_ptcal_init);
+
+void __init cbe_ras_init(void)
+{
+	unsigned long hid0;
+
+	/*
+	 * Enable System Error & thermal interrupts and wakeup conditions
+	 */
+
+	hid0 = mfspr(SPRN_HID0);
+	hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
+		HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
+	mtspr(SPRN_HID0, hid0);
+	mb();
+
+	/*
+	 * Install machine check handler. Leave setting of precise mode to
+	 * what the firmware did for now
+	 */
+	ppc_md.machine_check_exception = cbe_machine_check_handler;
+	mb();
+
+	/*
+	 * For now, we assume that IOC_FIR is already set to forward some
+	 * error conditions to the System Error handler. If that is not true
+	 * then it will have to be fixed up here.
+	 */
+}
diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h
new file mode 100644
index 00000000000..eb7ee54c82a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.h
@@ -0,0 +1,9 @@
+#ifndef RAS_H
+#define RAS_H
+
+extern void cbe_system_error_exception(struct pt_regs *regs);
+extern void cbe_maintenance_exception(struct pt_regs *regs);
+extern void cbe_thermal_exception(struct pt_regs *regs);
+extern void cbe_ras_init(void);
+
+#endif /* RAS_H */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index b33a4443f5a..6ae25fb6201 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -14,13 +14,12 @@
  */
 #undef DEBUG
 
-#include <linux/config.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
+#include <linux/export.h>
 #include <linux/unistd.h>
-#include <linux/slab.h>
 #include <linux/user.h>
 #include <linux/reboot.h>
 #include <linux/init.h>
@@ -29,11 +28,13 @@
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 #include <linux/console.h>
+#include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
+#include <linux/of_platform.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
 #include <asm/io.h>
-#include <asm/kexec.h>
 #include <asm/pgtable.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
@@ -46,10 +47,16 @@
 #include <asm/cputable.h>
 #include <asm/ppc-pci.h>
 #include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/cell-regs.h>
+#include <asm/io-workarounds.h>
 
 #include "interrupt.h"
-#include "iommu.h"
 #include "pervasive.h"
+#include "ras.h"
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -64,158 +71,202 @@ static void cell_show_cpuinfo(struct seq_file *m)
 
 	root = of_find_node_by_path("/");
 	if (root)
-		model = get_property(root, "model", NULL);
+		model = of_get_property(root, "model", NULL);
 	seq_printf(m, "machine\t\t: CHRP %s\n", model);
 	of_node_put(root);
 }
 
-#ifdef CONFIG_SPARSEMEM
-static int __init find_spu_node_id(struct device_node *spe)
+static void cell_progress(char *s, unsigned short hex)
 {
-	unsigned int *id;
-#ifdef CONFIG_NUMA
-	struct device_node *cpu;
-	cpu = spe->parent->parent;
-	id = (unsigned int *)get_property(cpu, "node-id", NULL);
-#else
-	id = NULL;
-#endif
-	return id ? *id : 0;
+	printk("*** %04x : %s\n", hex, s ? s : "");
 }
 
-static void __init cell_spuprop_present(struct device_node *spe,
-				       const char *prop, int early)
+static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev)
 {
-	struct address_prop {
-		unsigned long address;
-		unsigned int len;
-	} __attribute__((packed)) *p;
-	int proplen;
-
-	unsigned long start_pfn, end_pfn, pfn;
-	int node_id;
-
-	p = (void*)get_property(spe, prop, &proplen);
-	WARN_ON(proplen != sizeof (*p));
-
-	node_id = find_spu_node_id(spe);
-
-	start_pfn = p->address >> PAGE_SHIFT;
-	end_pfn = (p->address + p->len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
-	/* We need to call memory_present *before* the call to sparse_init,
-	   but we can initialize the page structs only *after* that call.
-	   Thus, we're being called twice. */
-	if (early)
-		memory_present(node_id, start_pfn, end_pfn);
-	else {
-		/* As the pages backing SPU LS and I/O are outside the range
-		   of regular memory, their page structs were not initialized
-		   by free_area_init. Do it here instead. */
-		for (pfn = start_pfn; pfn < end_pfn; pfn++) {
-			struct page *page = pfn_to_page(pfn);
-			set_page_links(page, ZONE_DMA, node_id, pfn);
-			set_page_count(page, 1);
-			reset_page_mapcount(page);
-			SetPageReserved(page);
-			INIT_LIST_HEAD(&page->lru);
-		}
+	struct pci_controller *hose;
+	const char *s;
+	int i;
+
+	if (!machine_is(cell))
+		return;
+
+	/* We're searching for a direct child of the PHB */
+	if (dev->bus->self != NULL || dev->devfn != 0)
+		return;
+
+	hose = pci_bus_to_host(dev->bus);
+	if (hose == NULL)
+		return;
+
+	/* Only on PCIE */
+	if (!of_device_is_compatible(hose->dn, "pciex"))
+		return;
+
+	/* And only on axon */
+	s = of_get_property(hose->dn, "model", NULL);
+	if (!s || strcmp(s, "Axon") != 0)
+		return;
+
+	for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+		dev->resource[i].start = dev->resource[i].end = 0;
+		dev->resource[i].flags = 0;
 	}
+
+	printk(KERN_DEBUG "PCI: Hiding resources on Axon PCIE RC %s\n",
+	       pci_name(dev));
 }
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex);
 
-static void __init cell_spumem_init(int early)
+static int cell_setup_phb(struct pci_controller *phb)
 {
-	struct device_node *node;
-	for (node = of_find_node_by_type(NULL, "spe");
-			node; node = of_find_node_by_type(node, "spe")) {
-		cell_spuprop_present(node, "local-store", early);
-		cell_spuprop_present(node, "problem", early);
-		cell_spuprop_present(node, "priv1", early);
-		cell_spuprop_present(node, "priv2", early);
+	const char *model;
+	struct device_node *np;
+
+	int rc = rtas_setup_phb(phb);
+	if (rc)
+		return rc;
+
+	np = phb->dn;
+	model = of_get_property(np, "model", NULL);
+	if (model == NULL || strcmp(np->name, "pci"))
+		return 0;
+
+	/* Setup workarounds for spider */
+	if (strcmp(model, "Spider"))
+		return 0;
+
+	iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
+				  (void *)SPIDER_PCI_REG_BASE);
+	return 0;
+}
+
+static const struct of_device_id cell_bus_ids[] __initconst = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .type = "spider", },
+	{ .type = "axon", },
+	{ .type = "plb5", },
+	{ .type = "plb4", },
+	{ .type = "opb", },
+	{ .type = "ebc", },
+	{},
+};
+
+static int __init cell_publish_devices(void)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	struct device_node *np;
+	int node;
+
+	/* Publish OF platform devices for southbridge IOs */
+	of_platform_bus_probe(NULL, cell_bus_ids, NULL);
+
+	/* On spider based blades, we need to manually create the OF
+	 * platform devices for the PCI host bridges
+	 */
+	for_each_child_of_node(root, np) {
+		if (np->type == NULL || (strcmp(np->type, "pci") != 0 &&
+					 strcmp(np->type, "pciex") != 0))
+			continue;
+		of_platform_device_create(np, NULL, NULL);
+	}
+
+	/* There is no device for the MIC memory controller, thus we create
+	 * a platform device for it to attach the EDAC driver to.
+	 */
+	for_each_online_node(node) {
+		if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+			continue;
+		platform_device_register_simple("cbe-mic", node, NULL, 0);
 	}
+
+	return 0;
 }
-#else
-static void __init cell_spumem_init(int early)
+machine_subsys_initcall(cell, cell_publish_devices);
+
+static void __init mpic_init_IRQ(void)
 {
+	struct device_node *dn;
+	struct mpic *mpic;
+
+	for (dn = NULL;
+	     (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+		if (!of_device_is_compatible(dn, "CBEA,platform-open-pic"))
+			continue;
+
+		/* The MPIC driver will get everything it needs from the
+		 * device-tree, just pass 0 to all arguments
+		 */
+		mpic = mpic_alloc(dn, 0, MPIC_SECONDARY | MPIC_NO_RESET,
+				0, 0, " MPIC     ");
+		if (mpic == NULL)
+			continue;
+		mpic_init(mpic);
+	}
 }
-#endif
 
-static void cell_progress(char *s, unsigned short hex)
+
+static void __init cell_init_irq(void)
 {
-	printk("*** %04x : %s\n", hex, s ? s : "");
+	iic_init_IRQ();
+	spider_init_IRQ();
+	mpic_init_IRQ();
+}
+
+static void __init cell_set_dabrx(void)
+{
+	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
 }
 
 static void __init cell_setup_arch(void)
 {
-	ppc_md.init_IRQ       = iic_init_IRQ;
-	ppc_md.get_irq        = iic_get_irq;
+#ifdef CONFIG_SPU_BASE
+	spu_priv1_ops = &spu_priv1_mmio_ops;
+	spu_management_ops = &spu_management_of_ops;
+#endif
+
+	cbe_regs_init();
+
+	cell_set_dabrx();
+
+#ifdef CONFIG_CBE_RAS
+	cbe_ras_init();
+#endif
 
 #ifdef CONFIG_SMP
 	smp_init_cell();
 #endif
-
 	/* init to some ~sane value until calibrate_delay() runs */
 	loops_per_jiffy = 50000000;
 
-	if (ROOT_DEV == 0) {
-		printk("No ramdisk, default root is /dev/hda2\n");
-		ROOT_DEV = Root_HDA2;
-	}
-
 	/* Find and initialize PCI host bridges */
 	init_pci_config_tokens();
-	find_and_init_phbs();
-	spider_init_IRQ();
-	cell_pervasive_init();
+
+	cbe_pervasive_init();
 #ifdef CONFIG_DUMMY_CONSOLE
 	conswitchp = &dummy_con;
 #endif
 
 	mmio_nvram_init();
-
-	cell_spumem_init(0);
 }
 
-/*
- * Early initialization.  Relocation is on but do not reference unbolted pages
- */
-static void __init cell_init_early(void)
+static int __init cell_probe(void)
 {
-	DBG(" -> cell_init_early()\n");
+	unsigned long root = of_get_flat_dt_root();
 
-	hpte_init_native();
-
-	cell_init_iommu();
-
-	ppc64_interrupt_controller = IC_CELL_PIC;
-
-	cell_spumem_init(1);
-
-	DBG(" <- cell_init_early()\n");
-}
-
-
-static int __init cell_probe(int platform)
-{
-	if (platform != PLATFORM_CELL)
+	if (!of_flat_dt_is_compatible(root, "IBM,CBEA") &&
+	    !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
 		return 0;
 
-	return 1;
-}
+	hpte_init_native();
 
-/*
- * Cell has no legacy IO; anything calling this function has to
- * fail or bad things will happen
- */
-static int cell_check_legacy_ioport(unsigned int baseport)
-{
-	return -ENODEV;
+	return 1;
 }
 
-struct machdep_calls __initdata cell_md = {
+define_machine(cell) {
+	.name			= "Cell",
 	.probe			= cell_probe,
 	.setup_arch		= cell_setup_arch,
-	.init_early		= cell_init_early,
 	.show_cpuinfo		= cell_show_cpuinfo,
 	.restart		= rtas_restart,
 	.power_off		= rtas_power_off,
@@ -224,11 +275,7 @@ struct machdep_calls __initdata cell_md = {
 	.get_rtc_time		= rtas_get_rtc_time,
 	.set_rtc_time		= rtas_set_rtc_time,
 	.calibrate_decr		= generic_calibrate_decr,
-	.check_legacy_ioport	= cell_check_legacy_ioport,
 	.progress		= cell_progress,
-#ifdef CONFIG_KEXEC
-	.machine_kexec		= default_machine_kexec,
-	.machine_kexec_prepare	= default_machine_kexec_prepare,
-	.machine_crash_shutdown	= default_machine_crash_shutdown,
-#endif
+	.init_IRQ       	= cell_init_irq,
+	.pci_setup_phb		= cell_setup_phb,
 };
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index bdf6c5fe58c..c8017a7bcab 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -14,9 +14,7 @@
 
 #undef DEBUG
 
-#include <linux/config.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
@@ -25,11 +23,11 @@
 #include <linux/spinlock.h>
 #include <linux/cache.h>
 #include <linux/err.h>
-#include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/cpu.h>
 
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -37,14 +35,15 @@
 #include <asm/prom.h>
 #include <asm/smp.h>
 #include <asm/paca.h>
-#include <asm/time.h>
 #include <asm/machdep.h>
 #include <asm/cputable.h>
 #include <asm/firmware.h>
-#include <asm/system.h>
 #include <asm/rtas.h>
+#include <asm/cputhreads.h>
+#include <asm/code-patching.h>
 
 #include "interrupt.h"
+#include <asm/udbg.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -53,13 +52,11 @@
 #endif
 
 /*
- * The primary thread of each non-boot processor is recorded here before
- * smp init.
+ * The Primary thread of each non-boot processor was started from the OF client
+ * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
  */
 static cpumask_t of_spin_map;
 
-extern void pSeries_secondary_smp_init(unsigned long);
-
 /**
  * smp_startup_cpu() - start the given cpu
  *
@@ -71,15 +68,15 @@ extern void pSeries_secondary_smp_init(unsigned long);
  *	0	- failure
  *	1	- success
  */
-static inline int __devinit smp_startup_cpu(unsigned int lcpu)
+static inline int smp_startup_cpu(unsigned int lcpu)
 {
 	int status;
-	unsigned long start_here = __pa((u32)*((unsigned long *)
-					       pSeries_secondary_smp_init));
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
 	unsigned int pcpu;
 	int start_cpu;
 
-	if (cpu_isset(lcpu, of_spin_map))
+	if (cpumask_test_cpu(lcpu, &of_spin_map))
 		/* Already started by OF and sitting in spin loop */
 		return 1;
 
@@ -105,66 +102,30 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
 	return 1;
 }
 
-static void smp_iic_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		iic_cause_IPI(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			iic_cause_IPI(i, msg);
-		}
-	}
-}
-
 static int __init smp_iic_probe(void)
 {
 	iic_request_IPIs();
 
-	return cpus_weight(cpu_possible_map);
+	return cpumask_weight(cpu_possible_mask);
 }
 
-static void __devinit smp_iic_setup_cpu(int cpu)
+static void smp_cell_setup_cpu(int cpu)
 {
 	if (cpu != boot_cpuid)
 		iic_setup_cpu();
-}
-
-static DEFINE_SPINLOCK(timebase_lock);
-static unsigned long timebase = 0;
-
-static void __devinit cell_give_timebase(void)
-{
-	spin_lock(&timebase_lock);
-	rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
-	timebase = get_tb();
-	spin_unlock(&timebase_lock);
-
-	while (timebase)
-		barrier();
-	rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
-}
 
-static void __devinit cell_take_timebase(void)
-{
-	while (!timebase)
-		barrier();
-	spin_lock(&timebase_lock);
-	set_tb(timebase >> 32, timebase & 0xffffffff);
-	timebase = 0;
-	spin_unlock(&timebase_lock);
+	/*
+	 * change default DABRX to allow user watchpoints
+	 */
+	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
 }
 
-static void __devinit smp_cell_kick_cpu(int nr)
+static int smp_cell_kick_cpu(int nr)
 {
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
 	if (!smp_startup_cpu(nr))
-		return;
+		return -ENOENT;
 
 	/*
 	 * The processor is currently spinning, waiting for the
@@ -172,27 +133,16 @@ static void __devinit smp_cell_kick_cpu(int nr)
 	 * the processor will continue on to secondary_start
 	 */
 	paca[nr].cpu_start = 1;
-}
 
-static int smp_cell_cpu_bootable(unsigned int nr)
-{
-	/* Special case - we inhibit secondary thread startup
-	 * during boot if the user requests it.  Odd-numbered
-	 * cpus are assumed to be secondary threads.
-	 */
-	if (system_state < SYSTEM_RUNNING &&
-	    cpu_has_feature(CPU_FTR_SMT) &&
-	    !smt_enabled_at_boot && nr % 2 != 0)
-		return 0;
-
-	return 1;
+	return 0;
 }
+
 static struct smp_ops_t bpa_iic_smp_ops = {
-	.message_pass	= smp_iic_message_pass,
+	.message_pass	= iic_message_pass,
 	.probe		= smp_iic_probe,
 	.kick_cpu	= smp_cell_kick_cpu,
-	.setup_cpu	= smp_iic_setup_cpu,
-	.cpu_bootable	= smp_cell_cpu_bootable,
+	.setup_cpu	= smp_cell_setup_cpu,
+	.cpu_bootable	= smp_generic_cpu_bootable,
 };
 
 /* This is called very early */
@@ -207,23 +157,18 @@ void __init smp_init_cell(void)
 	/* Mark threads which are still spinning in hold loops. */
 	if (cpu_has_feature(CPU_FTR_SMT)) {
 		for_each_present_cpu(i) {
-			if (i % 2 == 0)
-				/*
-				 * Even-numbered logical cpus correspond to
-				 * primary threads.
-				 */
-				cpu_set(i, of_spin_map);
+			if (cpu_thread_in_core(i) == 0)
+				cpumask_set_cpu(i, &of_spin_map);
 		}
-	} else {
-		of_spin_map = cpu_present_map;
-	}
+	} else
+		cpumask_copy(&of_spin_map, cpu_present_mask);
 
-	cpu_clear(boot_cpuid, of_spin_map);
+	cpumask_clear_cpu(boot_cpuid, &of_spin_map);
 
 	/* Non-lpar has additional take/give timebase */
 	if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
-		smp_ops->give_timebase = cell_give_timebase;
-		smp_ops->take_timebase = cell_take_timebase;
+		smp_ops->give_timebase = rtas_give_timebase;
+		smp_ops->take_timebase = rtas_take_timebase;
 	}
 
 	DBG(" <- smp_init_cell()\n");
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
new file mode 100644
index 00000000000..f1f7878893f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -0,0 +1,184 @@
+/*
+ * IO workarounds for PCI on Celleb/Cell platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+#include <asm/io-workarounds.h>
+
+#define SPIDER_PCI_DISABLE_PREFETCH
+
+struct spiderpci_iowa_private {
+	void __iomem *regs;
+};
+
+static void spiderpci_io_flush(struct iowa_bus *bus)
+{
+	struct spiderpci_iowa_private *priv;
+	u32 val;
+
+	priv = bus->private;
+	val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+	iosync();
+}
+
+#define SPIDER_PCI_MMIO_READ(name, ret)					\
+static ret spiderpci_##name(const PCI_IO_ADDR addr)			\
+{									\
+	ret val = __do_##name(addr);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+	return val;							\
+}
+
+#define SPIDER_PCI_MMIO_READ_STR(name)					\
+static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, 	\
+			     unsigned long count)			\
+{									\
+	__do_##name(addr, buf, count);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+}
+
+SPIDER_PCI_MMIO_READ(readb, u8)
+SPIDER_PCI_MMIO_READ(readw, u16)
+SPIDER_PCI_MMIO_READ(readl, u32)
+SPIDER_PCI_MMIO_READ(readq, u64)
+SPIDER_PCI_MMIO_READ(readw_be, u16)
+SPIDER_PCI_MMIO_READ(readl_be, u32)
+SPIDER_PCI_MMIO_READ(readq_be, u64)
+SPIDER_PCI_MMIO_READ_STR(readsb)
+SPIDER_PCI_MMIO_READ_STR(readsw)
+SPIDER_PCI_MMIO_READ_STR(readsl)
+
+static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+				    unsigned long n)
+{
+	__do_memcpy_fromio(dest, src, n);
+	spiderpci_io_flush(iowa_mem_find_bus(src));
+}
+
+static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
+					   void __iomem *regs)
+{
+	void *dummy_page_va;
+	dma_addr_t dummy_page_da;
+
+#ifdef SPIDER_PCI_DISABLE_PREFETCH
+	u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT);
+	pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val);
+	out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
+#endif /* SPIDER_PCI_DISABLE_PREFETCH */
+
+	/* setup dummy read */
+	/*
+	 * On CellBlade, we can't know that which XDR memory is used by
+	 * kmalloc() to allocate dummy_page_va.
+	 * In order to imporve the performance, the XDR which is used to
+	 * allocate dummy_page_va is the nearest the spider-pci.
+	 * We have to select the CBE which is the nearest the spider-pci
+	 * to allocate memory from the best XDR, but I don't know that
+	 * how to do.
+	 *
+	 * Celleb does not have this problem, because it has only one XDR.
+	 */
+	dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dummy_page_va) {
+		pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n");
+		return -1;
+	}
+
+	dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
+				       PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(phb->parent, dummy_page_da)) {
+		pr_err("SPIDER-IOWA:Map dummy page filed.\n");
+		kfree(dummy_page_va);
+		return -1;
+	}
+
+	out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da);
+
+	return 0;
+}
+
+int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
+{
+	void __iomem *regs = NULL;
+	struct spiderpci_iowa_private *priv;
+	struct device_node *np = bus->phb->dn;
+	struct resource r;
+	unsigned long offset = (unsigned long)data;
+
+	pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n",
+		 np->full_name);
+
+	priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
+	if (!priv) {
+		pr_err("SPIDERPCI-IOWA:"
+		       "Can't allocate struct spiderpci_iowa_private");
+		return -1;
+	}
+
+	if (of_address_to_resource(np, 0, &r)) {
+		pr_err("SPIDERPCI-IOWA:Can't get resource.\n");
+		goto error;
+	}
+
+	regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE);
+	if (!regs) {
+		pr_err("SPIDERPCI-IOWA:ioremap failed.\n");
+		goto error;
+	}
+	priv->regs = regs;
+	bus->private = priv;
+
+	if (spiderpci_pci_setup_chip(bus->phb, regs))
+		goto error;
+
+	return 0;
+
+error:
+	kfree(priv);
+	bus->private = NULL;
+
+	if (regs)
+		iounmap(regs);
+
+	return -1;
+}
+
+struct ppc_pci_io spiderpci_ops = {
+	.readb = spiderpci_readb,
+	.readw = spiderpci_readw,
+	.readl = spiderpci_readl,
+	.readq = spiderpci_readq,
+	.readw_be = spiderpci_readw_be,
+	.readl_be = spiderpci_readl_be,
+	.readq_be = spiderpci_readq_be,
+	.readsb = spiderpci_readsb,
+	.readsw = spiderpci_readsw,
+	.readsl = spiderpci_readsl,
+	.memcpy_fromio = spiderpci_memcpy_fromio,
+};
+
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index e74132188bd..1f72f4ab635 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -22,6 +22,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/ioport.h>
 
 #include <asm/pgtable.h>
 #include <asm/prom.h>
@@ -56,136 +57,303 @@ enum {
 	REISWAITEN	= 0x508, /* Reissue Wait Control*/
 };
 
-static void __iomem *spider_pics[4];
+#define SPIDER_CHIP_COUNT	4
+#define SPIDER_SRC_COUNT	64
+#define SPIDER_IRQ_INVALID	63
 
-static void __iomem *spider_get_pic(int irq)
+struct spider_pic {
+	struct irq_domain		*host;
+	void __iomem		*regs;
+	unsigned int		node_id;
+};
+static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
+
+static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d)
 {
-	int node = irq / IIC_NODE_STRIDE;
-	irq %= IIC_NODE_STRIDE;
-
-	if (irq >= IIC_EXT_OFFSET &&
-	    irq < IIC_EXT_OFFSET + IIC_NUM_EXT &&
-	    spider_pics)
-		return spider_pics[node];
-	return NULL;
+	return irq_data_get_irq_chip_data(d);
 }
 
-static int spider_get_nr(unsigned int irq)
+static void __iomem *spider_get_irq_config(struct spider_pic *pic,
+					   unsigned int src)
 {
-	return (irq % IIC_NODE_STRIDE) - IIC_EXT_OFFSET;
+	return pic->regs + TIR_CFGA + 8 * src;
 }
 
-static void __iomem *spider_get_irq_config(int irq)
+static void spider_unmask_irq(struct irq_data *d)
 {
-	void __iomem *pic;
-	pic = spider_get_pic(irq);
-	return pic + TIR_CFGA + 8 * spider_get_nr(irq);
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+	out_be32(cfg, in_be32(cfg) | 0x30000000u);
 }
 
-static void spider_enable_irq(unsigned int irq)
+static void spider_mask_irq(struct irq_data *d)
 {
-	void __iomem *cfg = spider_get_irq_config(irq);
-	irq = spider_get_nr(irq);
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
 
-	out_be32(cfg, in_be32(cfg) | 0x3107000eu);
-	out_be32(cfg + 4, in_be32(cfg + 4) | 0x00020000u | irq);
+	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
 }
 
-static void spider_disable_irq(unsigned int irq)
+static void spider_ack_irq(struct irq_data *d)
 {
-	void __iomem *cfg = spider_get_irq_config(irq);
-	irq = spider_get_nr(irq);
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int src = irqd_to_hwirq(d);
 
-	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+	/* Reset edge detection logic if necessary
+	 */
+	if (irqd_is_level_type(d))
+		return;
+
+	/* Only interrupts 47 to 50 can be set to edge */
+	if (src < 47 || src > 50)
+		return;
+
+	/* Perform the clear of the edge logic */
+	out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf));
 }
 
-static unsigned int spider_startup_irq(unsigned int irq)
+static int spider_set_irq_type(struct irq_data *d, unsigned int type)
 {
-	spider_enable_irq(irq);
+	unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
+	unsigned int hw = irqd_to_hwirq(d);
+	void __iomem *cfg = spider_get_irq_config(pic, hw);
+	u32 old_mask;
+	u32 ic;
+
+	/* Note that only level high is supported for most interrupts */
+	if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH &&
+	    (hw < 47 || hw > 50))
+		return -EINVAL;
+
+	/* Decode sense type */
+	switch(sense) {
+	case IRQ_TYPE_EDGE_RISING:
+		ic = 0x3;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		ic = 0x2;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		ic = 0x0;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+	case IRQ_TYPE_NONE:
+		ic = 0x1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Configure the source. One gross hack that was there before and
+	 * that I've kept around is the priority to the BE which I set to
+	 * be the same as the interrupt source number. I don't know whether
+	 * that's supposed to make any kind of sense however, we'll have to
+	 * decide that, but for now, I'm not changing the behaviour.
+	 */
+	old_mask = in_be32(cfg) & 0x30000000u;
+	out_be32(cfg, old_mask | (ic << 24) | (0x7 << 16) |
+		 (pic->node_id << 4) | 0xe);
+	out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff));
+
 	return 0;
 }
 
-static void spider_shutdown_irq(unsigned int irq)
+static struct irq_chip spider_pic = {
+	.name = "SPIDER",
+	.irq_unmask = spider_unmask_irq,
+	.irq_mask = spider_mask_irq,
+	.irq_ack = spider_ack_irq,
+	.irq_set_type = spider_set_irq_type,
+};
+
+static int spider_host_map(struct irq_domain *h, unsigned int virq,
+			irq_hw_number_t hw)
 {
-	spider_disable_irq(irq);
+	irq_set_chip_data(virq, h->host_data);
+	irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq);
+
+	/* Set default irq type */
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
 }
 
-static void spider_end_irq(unsigned int irq)
+static int spider_host_xlate(struct irq_domain *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
 {
-	spider_enable_irq(irq);
+	/* Spider interrupts have 2 cells, first is the interrupt source,
+	 * second, well, I don't know for sure yet ... We mask the top bits
+	 * because old device-trees encode a node number in there
+	 */
+	*out_hwirq = intspec[0] & 0x3f;
+	*out_flags = IRQ_TYPE_LEVEL_HIGH;
+	return 0;
 }
 
-static void spider_ack_irq(unsigned int irq)
+static const struct irq_domain_ops spider_host_ops = {
+	.map = spider_host_map,
+	.xlate = spider_host_xlate,
+};
+
+static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc)
 {
-	spider_disable_irq(irq);
-	iic_local_enable();
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct spider_pic *pic = irq_desc_get_handler_data(desc);
+	unsigned int cs, virq;
+
+	cs = in_be32(pic->regs + TIR_CS) >> 24;
+	if (cs == SPIDER_IRQ_INVALID)
+		virq = NO_IRQ;
+	else
+		virq = irq_linear_revmap(pic->host, cs);
+
+	if (virq != NO_IRQ)
+		generic_handle_irq(virq);
+
+	chip->irq_eoi(&desc->irq_data);
 }
 
-static struct hw_interrupt_type spider_pic = {
-	.typename = " SPIDER   ",
-	.startup = spider_startup_irq,
-	.shutdown = spider_shutdown_irq,
-	.enable = spider_enable_irq,
-	.disable = spider_disable_irq,
-	.ack = spider_ack_irq,
-	.end = spider_end_irq,
-};
+/* For hooking up the cascace we have a problem. Our device-tree is
+ * crap and we don't know on which BE iic interrupt we are hooked on at
+ * least not the "standard" way. We can reconstitute it based on two
+ * informations though: which BE node we are connected to and whether
+ * we are connected to IOIF0 or IOIF1. Right now, we really only care
+ * about the IBM cell blade and we know that its firmware gives us an
+ * interrupt-map property which is pretty strange.
+ */
+static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
+{
+	unsigned int virq;
+	const u32 *imap, *tmp;
+	int imaplen, intsize, unit;
+	struct device_node *iic;
+
+	/* First, we check whether we have a real "interrupts" in the device
+	 * tree in case the device-tree is ever fixed
+	 */
+	virq = irq_of_parse_and_map(pic->host->of_node, 0);
+	if (virq)
+		return virq;
+
+	/* Now do the horrible hacks */
+	tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL);
+	if (tmp == NULL)
+		return NO_IRQ;
+	intsize = *tmp;
+	imap = of_get_property(pic->host->of_node, "interrupt-map", &imaplen);
+	if (imap == NULL || imaplen < (intsize + 1))
+		return NO_IRQ;
+	iic = of_find_node_by_phandle(imap[intsize]);
+	if (iic == NULL)
+		return NO_IRQ;
+	imap += intsize + 1;
+	tmp = of_get_property(iic, "#interrupt-cells", NULL);
+	if (tmp == NULL) {
+		of_node_put(iic);
+		return NO_IRQ;
+	}
+	intsize = *tmp;
+	/* Assume unit is last entry of interrupt specifier */
+	unit = imap[intsize - 1];
+	/* Ok, we have a unit, now let's try to get the node */
+	tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL);
+	if (tmp == NULL) {
+		of_node_put(iic);
+		return NO_IRQ;
+	}
+	/* ugly as hell but works for now */
+	pic->node_id = (*tmp) >> 1;
+	of_node_put(iic);
+
+	/* Ok, now let's get cracking. You may ask me why I just didn't match
+	 * the iic host from the iic OF node, but that way I'm still compatible
+	 * with really really old old firmwares for which we don't have a node
+	 */
+	/* Manufacture an IIC interrupt number of class 2 */
+	virq = irq_create_mapping(NULL,
+				  (pic->node_id << IIC_IRQ_NODE_SHIFT) |
+				  (2 << IIC_IRQ_CLASS_SHIFT) |
+				  unit);
+	if (virq == NO_IRQ)
+		printk(KERN_ERR "spider_pic: failed to map cascade !");
+	return virq;
+}
 
 
-int spider_get_irq(unsigned long int_pending)
+static void __init spider_init_one(struct device_node *of_node, int chip,
+				   unsigned long addr)
 {
-	void __iomem *regs = spider_get_pic(int_pending);
-	unsigned long cs;
-	int irq;
+	struct spider_pic *pic = &spider_pics[chip];
+	int i, virq;
+
+	/* Map registers */
+	pic->regs = ioremap(addr, 0x1000);
+	if (pic->regs == NULL)
+		panic("spider_pic: can't map registers !");
+
+	/* Allocate a host */
+	pic->host = irq_domain_add_linear(of_node, SPIDER_SRC_COUNT,
+					  &spider_host_ops, pic);
+	if (pic->host == NULL)
+		panic("spider_pic: can't allocate irq host !");
+
+	/* Go through all sources and disable them */
+	for (i = 0; i < SPIDER_SRC_COUNT; i++) {
+		void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i;
+		out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+	}
+
+	/* do not mask any interrupts because of level */
+	out_be32(pic->regs + TIR_MSK, 0x0);
+
+	/* enable interrupt packets to be output */
+	out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1);
 
-	cs = in_be32(regs + TIR_CS);
+	/* Hook up the cascade interrupt to the iic and nodeid */
+	virq = spider_find_cascade_and_node(pic);
+	if (virq == NO_IRQ)
+		return;
+	irq_set_handler_data(virq, pic);
+	irq_set_chained_handler(virq, spider_irq_cascade);
 
-	irq = cs >> 24;
-	if (irq != 63)
-		return irq;
+	printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %s\n",
+	       pic->node_id, addr, of_node->full_name);
 
-	return -1;
+	/* Enable the interrupt detection enable bit. Do this last! */
+	out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
 }
- 
-void spider_init_IRQ(void)
+
+void __init spider_init_IRQ(void)
 {
-	int node;
+	struct resource r;
 	struct device_node *dn;
-	unsigned int *property;
-	long spiderpic;
-	int n;
-
-/* FIXME: detect multiple PICs as soon as the device tree has them */
-	for (node = 0; node < 1; node++) {
-		dn = of_find_node_by_path("/");
-		n = prom_n_addr_cells(dn);
-		property = (unsigned int *) get_property(dn,
-				"platform-spider-pic", NULL);
-
-		if (!property)
+	int chip = 0;
+
+	/* XXX node numbers are totally bogus. We _hope_ we get the device
+	 * nodes in the right order here but that's definitely not guaranteed,
+	 * we need to get the node from the device tree instead.
+	 * There is currently no proper property for it (but our whole
+	 * device-tree is bogus anyway) so all we can do is pray or maybe test
+	 * the address and deduce the node-id
+	 */
+	for (dn = NULL;
+	     (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+		if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) {
+			if (of_address_to_resource(dn, 0, &r)) {
+				printk(KERN_WARNING "spider-pic: Failed\n");
+				continue;
+			}
+		} else if (of_device_is_compatible(dn, "sti,platform-spider-pic")
+			   && (chip < 2)) {
+			static long hard_coded_pics[] =
+				{ 0x24000008000ul, 0x34000008000ul};
+			r.start = hard_coded_pics[chip];
+		} else
 			continue;
-		for (spiderpic = 0; n > 0; --n)
-			spiderpic = (spiderpic << 32) + *property++;
-		printk(KERN_DEBUG "SPIDER addr: %lx\n", spiderpic);
-		spider_pics[node] = __ioremap(spiderpic, 0x800, _PAGE_NO_CACHE);
-		for (n = 0; n < IIC_NUM_EXT; n++) {
-			int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE;
-			get_irq_desc(irq)->handler = &spider_pic;
-
- 		/* do not mask any interrupts because of level */
- 		out_be32(spider_pics[node] + TIR_MSK, 0x0);
- 		
- 		/* disable edge detection clear */
- 		/* out_be32(spider_pics[node] + TIR_EDC, 0x0); */
- 		
- 		/* enable interrupt packets to be output */
- 		out_be32(spider_pics[node] + TIR_PIEN,
-			in_be32(spider_pics[node] + TIR_PIEN) | 0x1);
- 		
- 		/* Enable the interrupt detection enable bit. Do this last! */
- 		out_be32(spider_pics[node] + TIR_DEN,
-			in_be32(spider_pics[node] +TIR_DEN) | 0x1);
-
-		}
+		spider_init_one(dn, chip++, r.start);
 	}
 }
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index d75ae03df68..f85db3a69b4 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -25,39 +25,117 @@
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/module.h>
-#include <linux/poll.h>
 #include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
-
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/semaphore.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/linux_logo.h>
+#include <linux/syscore_ops.h>
 #include <asm/spu.h>
-#include <asm/mmu_context.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/xmon.h>
+#include <asm/prom.h>
+#include <asm/kexec.h>
+
+const struct spu_management_ops *spu_management_ops;
+EXPORT_SYMBOL_GPL(spu_management_ops);
+
+const struct spu_priv1_ops *spu_priv1_ops;
+EXPORT_SYMBOL_GPL(spu_priv1_ops);
+
+struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
+EXPORT_SYMBOL_GPL(cbe_spu_info);
+
+/*
+ * The spufs fault-handling code needs to call force_sig_info to raise signals
+ * on DMA errors. Export it here to avoid general kernel-wide access to this
+ * function
+ */
+EXPORT_SYMBOL_GPL(force_sig_info);
+
+/*
+ * Protects cbe_spu_info and spu->number.
+ */
+static DEFINE_SPINLOCK(spu_lock);
+
+/*
+ * List of all spus in the system.
+ *
+ * This list is iterated by callers from irq context and callers that
+ * want to sleep.  Thus modifications need to be done with both
+ * spu_full_list_lock and spu_full_list_mutex held, while iterating
+ * through it requires either of these locks.
+ *
+ * In addition spu_full_list_lock protects all assignmens to
+ * spu->mm.
+ */
+static LIST_HEAD(spu_full_list);
+static DEFINE_SPINLOCK(spu_full_list_lock);
+static DEFINE_MUTEX(spu_full_list_mutex);
 
-#include "interrupt.h"
+struct spu_slb {
+	u64 esid, vsid;
+};
 
-static int __spu_trap_invalid_dma(struct spu *spu)
+void spu_invalidate_slbs(struct spu *spu)
 {
-	pr_debug("%s\n", __FUNCTION__);
-	force_sig(SIGBUS, /* info, */ current);
-	return 0;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu->register_lock, flags);
+	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
+		out_be64(&priv2->slb_invalidate_all_W, 0UL);
+	spin_unlock_irqrestore(&spu->register_lock, flags);
 }
+EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
 
-static int __spu_trap_dma_align(struct spu *spu)
+/* This is called by the MM core when a segment size is changed, to
+ * request a flush of all the SPEs using a given mm
+ */
+void spu_flush_all_slbs(struct mm_struct *mm)
 {
-	pr_debug("%s\n", __FUNCTION__);
-	force_sig(SIGBUS, /* info, */ current);
-	return 0;
+	struct spu *spu;
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		if (spu->mm == mm)
+			spu_invalidate_slbs(spu);
+	}
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
 }
 
-static int __spu_trap_error(struct spu *spu)
+/* The hack below stinks... try to do something better one of
+ * these days... Does it even work properly with NR_CPUS == 1 ?
+ */
+static inline void mm_needs_global_tlbie(struct mm_struct *mm)
 {
-	pr_debug("%s\n", __FUNCTION__);
-	force_sig(SIGILL, /* info, */ current);
-	return 0;
+	int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
+
+	/* Global TLBIE broadcast required with SPEs. */
+	bitmap_fill(cpumask_bits(mm_cpumask(mm)), nr);
+}
+
+void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	spu->mm = mm;
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+	if (mm)
+		mm_needs_global_tlbie(mm);
+}
+EXPORT_SYMBOL_GPL(spu_associate_mm);
+
+int spu_64k_pages_available(void)
+{
+	return mmu_psize_defs[MMU_PAGE_64K].shift != 0;
 }
+EXPORT_SYMBOL_GPL(spu_64k_pages_available);
 
 static void spu_restart_dma(struct spu *spu)
 {
@@ -65,167 +143,212 @@ static void spu_restart_dma(struct spu *spu)
 
 	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags))
 		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+	else {
+		set_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+		mb();
+	}
 }
 
-static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
+static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb)
 {
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
-	struct mm_struct *mm = spu->mm;
-	u64 esid, vsid;
 
-	pr_debug("%s\n", __FUNCTION__);
+	pr_debug("%s: adding SLB[%d] 0x%016llx 0x%016llx\n",
+			__func__, slbe, slb->vsid, slb->esid);
 
-	if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) {
-		/* SLBs are pre-loaded for context switch, so
-		 * we should never get here!
-		 */
-		printk("%s: invalid access during switch!\n", __func__);
-		return 1;
-	}
-	if (!mm || (REGION_ID(ea) != USER_REGION_ID)) {
+	out_be64(&priv2->slb_index_W, slbe);
+	/* set invalid before writing vsid */
+	out_be64(&priv2->slb_esid_RW, 0);
+	/* now it's safe to write the vsid */
+	out_be64(&priv2->slb_vsid_RW, slb->vsid);
+	/* setting the new esid makes the entry valid again */
+	out_be64(&priv2->slb_esid_RW, slb->esid);
+}
+
+static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
+{
+	struct mm_struct *mm = spu->mm;
+	struct spu_slb slb;
+	int psize;
+
+	pr_debug("%s\n", __func__);
+
+	slb.esid = (ea & ESID_MASK) | SLB_ESID_V;
+
+	switch(REGION_ID(ea)) {
+	case USER_REGION_ID:
+#ifdef CONFIG_PPC_MM_SLICES
+		psize = get_slice_psize(mm, ea);
+#else
+		psize = mm->context.user_psize;
+#endif
+		slb.vsid = (get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M)
+				<< SLB_VSID_SHIFT) | SLB_VSID_USER;
+		break;
+	case VMALLOC_REGION_ID:
+		if (ea < VMALLOC_END)
+			psize = mmu_vmalloc_psize;
+		else
+			psize = mmu_io_psize;
+		slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M)
+				<< SLB_VSID_SHIFT) | SLB_VSID_KERNEL;
+		break;
+	case KERNEL_REGION_ID:
+		psize = mmu_linear_psize;
+		slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M)
+				<< SLB_VSID_SHIFT) | SLB_VSID_KERNEL;
+		break;
+	default:
 		/* Future: support kernel segments so that drivers
 		 * can use SPUs.
 		 */
 		pr_debug("invalid region access at %016lx\n", ea);
 		return 1;
 	}
+	slb.vsid |= mmu_psize_defs[psize].sllp;
 
-	esid = (ea & ESID_MASK) | SLB_ESID_V;
-	vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | SLB_VSID_USER;
-	if (in_hugepage_area(mm->context, ea))
-		vsid |= SLB_VSID_L;
-
-	out_be64(&priv2->slb_index_W, spu->slb_replace);
-	out_be64(&priv2->slb_vsid_RW, vsid);
-	out_be64(&priv2->slb_esid_RW, esid);
+	spu_load_slb(spu, spu->slb_replace, &slb);
 
 	spu->slb_replace++;
 	if (spu->slb_replace >= 8)
 		spu->slb_replace = 0;
 
 	spu_restart_dma(spu);
-
+	spu->stats.slb_flt++;
 	return 0;
 }
 
 extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX
 static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
 {
-	pr_debug("%s\n", __FUNCTION__);
-
-	/* Handle kernel space hash faults immediately.
-	   User hash faults need to be deferred to process context. */
-	if ((dsisr & MFC_DSISR_PTE_NOT_FOUND)
-	    && REGION_ID(ea) != USER_REGION_ID
-	    && hash_page(ea, _PAGE_PRESENT, 0x300) == 0) {
-		spu_restart_dma(spu);
-		return 0;
-	}
+	int ret;
 
-	if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) {
-		printk("%s: invalid access during switch!\n", __func__);
-		return 1;
+	pr_debug("%s, %llx, %lx\n", __func__, dsisr, ea);
+
+	/*
+	 * Handle kernel space hash faults immediately. User hash
+	 * faults need to be deferred to process context.
+	 */
+	if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
+	    (REGION_ID(ea) != USER_REGION_ID)) {
+
+		spin_unlock(&spu->register_lock);
+		ret = hash_page(ea, _PAGE_PRESENT, 0x300);
+		spin_lock(&spu->register_lock);
+
+		if (!ret) {
+			spu_restart_dma(spu);
+			return 0;
+		}
 	}
 
-	spu->dar = ea;
-	spu->dsisr = dsisr;
-	mb();
-	if (spu->stop_callback)
-		spu->stop_callback(spu);
-	return 0;
-}
+	spu->class_1_dar = ea;
+	spu->class_1_dsisr = dsisr;
 
-static int __spu_trap_mailbox(struct spu *spu)
-{
-	if (spu->ibox_callback)
-		spu->ibox_callback(spu);
+	spu->stop_callback(spu, 1);
 
-	/* atomically disable SPU mailbox interrupts */
-	spin_lock(&spu->register_lock);
-	spu_int_mask_and(spu, 2, ~0x1);
-	spin_unlock(&spu->register_lock);
-	return 0;
-}
+	spu->class_1_dar = 0;
+	spu->class_1_dsisr = 0;
 
-static int __spu_trap_stop(struct spu *spu)
-{
-	pr_debug("%s\n", __FUNCTION__);
-	spu->stop_code = in_be32(&spu->problem->spu_status_R);
-	if (spu->stop_callback)
-		spu->stop_callback(spu);
 	return 0;
 }
 
-static int __spu_trap_halt(struct spu *spu)
+static void __spu_kernel_slb(void *addr, struct spu_slb *slb)
 {
-	pr_debug("%s\n", __FUNCTION__);
-	spu->stop_code = in_be32(&spu->problem->spu_status_R);
-	if (spu->stop_callback)
-		spu->stop_callback(spu);
-	return 0;
-}
+	unsigned long ea = (unsigned long)addr;
+	u64 llp;
 
-static int __spu_trap_tag_group(struct spu *spu)
-{
-	pr_debug("%s\n", __FUNCTION__);
-	/* wake_up(&spu->dma_wq); */
-	return 0;
+	if (REGION_ID(ea) == KERNEL_REGION_ID)
+		llp = mmu_psize_defs[mmu_linear_psize].sllp;
+	else
+		llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+
+	slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) |
+		SLB_VSID_KERNEL | llp;
+	slb->esid = (ea & ESID_MASK) | SLB_ESID_V;
 }
 
-static int __spu_trap_spubox(struct spu *spu)
+/**
+ * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the
+ * address @new_addr is present.
+ */
+static inline int __slb_present(struct spu_slb *slbs, int nr_slbs,
+		void *new_addr)
 {
-	if (spu->wbox_callback)
-		spu->wbox_callback(spu);
+	unsigned long ea = (unsigned long)new_addr;
+	int i;
+
+	for (i = 0; i < nr_slbs; i++)
+		if (!((slbs[i].esid ^ ea) & ESID_MASK))
+			return 1;
 
-	/* atomically disable SPU mailbox interrupts */
-	spin_lock(&spu->register_lock);
-	spu_int_mask_and(spu, 2, ~0x10);
-	spin_unlock(&spu->register_lock);
 	return 0;
 }
 
-static irqreturn_t
-spu_irq_class_0(int irq, void *data, struct pt_regs *regs)
+/**
+ * Setup the SPU kernel SLBs, in preparation for a context save/restore. We
+ * need to map both the context save area, and the save/restore code.
+ *
+ * Because the lscsa and code may cross segment boundaires, we check to see
+ * if mappings are required for the start and end of each range. We currently
+ * assume that the mappings are smaller that one segment - if not, something
+ * is seriously wrong.
+ */
+void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa,
+		void *code, int code_size)
 {
-	struct spu *spu;
+	struct spu_slb slbs[4];
+	int i, nr_slbs = 0;
+	/* start and end addresses of both mappings */
+	void *addrs[] = {
+		lscsa, (void *)lscsa + sizeof(*lscsa) - 1,
+		code, code + code_size - 1
+	};
 
-	spu = data;
-	spu->class_0_pending = 1;
-	if (spu->stop_callback)
-		spu->stop_callback(spu);
+	/* check the set of addresses, and create a new entry in the slbs array
+	 * if there isn't already a SLB for that address */
+	for (i = 0; i < ARRAY_SIZE(addrs); i++) {
+		if (__slb_present(slbs, nr_slbs, addrs[i]))
+			continue;
 
-	return IRQ_HANDLED;
+		__spu_kernel_slb(addrs[i], &slbs[nr_slbs]);
+		nr_slbs++;
+	}
+
+	spin_lock_irq(&spu->register_lock);
+	/* Add the set of SLBs */
+	for (i = 0; i < nr_slbs; i++)
+		spu_load_slb(spu, i, &slbs[i]);
+	spin_unlock_irq(&spu->register_lock);
 }
+EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs);
 
-int
-spu_irq_class_0_bottom(struct spu *spu)
+static irqreturn_t
+spu_irq_class_0(int irq, void *data)
 {
+	struct spu *spu;
 	unsigned long stat, mask;
 
-	spu->class_0_pending = 0;
+	spu = data;
 
+	spin_lock(&spu->register_lock);
 	mask = spu_int_mask_get(spu, 0);
-	stat = spu_int_stat_get(spu, 0);
+	stat = spu_int_stat_get(spu, 0) & mask;
 
-	stat &= mask;
-
-	if (stat & 1) /* invalid MFC DMA */
-		__spu_trap_invalid_dma(spu);
-
-	if (stat & 2) /* invalid DMA alignment */
-		__spu_trap_dma_align(spu);
-
-	if (stat & 4) /* error on SPU */
-		__spu_trap_error(spu);
+	spu->class_0_pending |= stat;
+	spu->class_0_dar = spu_mfc_dar_get(spu);
+	spu->stop_callback(spu, 0);
+	spu->class_0_pending = 0;
+	spu->class_0_dar = 0;
 
 	spu_int_stat_clear(spu, 0, stat);
+	spin_unlock(&spu->register_lock);
 
-	return (stat & 0x7) ? -EIO : 0;
+	return IRQ_HANDLED;
 }
-EXPORT_SYMBOL_GPL(spu_irq_class_0_bottom);
 
 static irqreturn_t
-spu_irq_class_1(int irq, void *data, struct pt_regs *regs)
+spu_irq_class_1(int irq, void *data)
 {
 	struct spu *spu;
 	unsigned long stat, mask, dar, dsisr;
@@ -238,113 +361,130 @@ spu_irq_class_1(int irq, void *data, struct pt_regs *regs)
 	stat  = spu_int_stat_get(spu, 1) & mask;
 	dar   = spu_mfc_dar_get(spu);
 	dsisr = spu_mfc_dsisr_get(spu);
-	if (stat & 2) /* mapping fault */
+	if (stat & CLASS1_STORAGE_FAULT_INTR)
 		spu_mfc_dsisr_set(spu, 0ul);
 	spu_int_stat_clear(spu, 1, stat);
-	spin_unlock(&spu->register_lock);
 
-	if (stat & 1) /* segment fault */
+	pr_debug("%s: %lx %lx %lx %lx\n", __func__, mask, stat,
+			dar, dsisr);
+
+	if (stat & CLASS1_SEGMENT_FAULT_INTR)
 		__spu_trap_data_seg(spu, dar);
 
-	if (stat & 2) { /* mapping fault */
+	if (stat & CLASS1_STORAGE_FAULT_INTR)
 		__spu_trap_data_map(spu, dar, dsisr);
-	}
 
-	if (stat & 4) /* ls compare & suspend on get */
+	if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_GET_INTR)
 		;
 
-	if (stat & 8) /* ls compare & suspend on put */
+	if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_PUT_INTR)
 		;
 
+	spu->class_1_dsisr = 0;
+	spu->class_1_dar = 0;
+
+	spin_unlock(&spu->register_lock);
+
 	return stat ? IRQ_HANDLED : IRQ_NONE;
 }
-EXPORT_SYMBOL_GPL(spu_irq_class_1_bottom);
 
 static irqreturn_t
-spu_irq_class_2(int irq, void *data, struct pt_regs *regs)
+spu_irq_class_2(int irq, void *data)
 {
 	struct spu *spu;
 	unsigned long stat;
 	unsigned long mask;
+	const int mailbox_intrs =
+		CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR;
 
 	spu = data;
+	spin_lock(&spu->register_lock);
 	stat = spu_int_stat_get(spu, 2);
 	mask = spu_int_mask_get(spu, 2);
+	/* ignore interrupts we're not waiting for */
+	stat &= mask;
+	/* mailbox interrupts are level triggered. mask them now before
+	 * acknowledging */
+	if (stat & mailbox_intrs)
+		spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs));
+	/* acknowledge all interrupts before the callbacks */
+	spu_int_stat_clear(spu, 2, stat);
 
 	pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask);
 
-	stat &= mask;
+	if (stat & CLASS2_MAILBOX_INTR)
+		spu->ibox_callback(spu);
 
-	if (stat & 1)  /* PPC core mailbox */
-		__spu_trap_mailbox(spu);
+	if (stat & CLASS2_SPU_STOP_INTR)
+		spu->stop_callback(spu, 2);
 
-	if (stat & 2) /* SPU stop-and-signal */
-		__spu_trap_stop(spu);
+	if (stat & CLASS2_SPU_HALT_INTR)
+		spu->stop_callback(spu, 2);
 
-	if (stat & 4) /* SPU halted */
-		__spu_trap_halt(spu);
+	if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR)
+		spu->mfc_callback(spu);
 
-	if (stat & 8) /* DMA tag group complete */
-		__spu_trap_tag_group(spu);
+	if (stat & CLASS2_MAILBOX_THRESHOLD_INTR)
+		spu->wbox_callback(spu);
 
-	if (stat & 0x10) /* SPU mailbox threshold */
-		__spu_trap_spubox(spu);
+	spu->stats.class2_intr++;
+
+	spin_unlock(&spu->register_lock);
 
-	spu_int_stat_clear(spu, 2, stat);
 	return stat ? IRQ_HANDLED : IRQ_NONE;
 }
 
-static int
-spu_request_irqs(struct spu *spu)
+static int spu_request_irqs(struct spu *spu)
 {
-	int ret;
-	int irq_base;
-
-	irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET;
-
-	snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0", spu->number);
-	ret = request_irq(irq_base + spu->isrc,
-		 spu_irq_class_0, 0, spu->irq_c0, spu);
-	if (ret)
-		goto out;
-
-	snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1", spu->number);
-	ret = request_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc,
-		 spu_irq_class_1, 0, spu->irq_c1, spu);
-	if (ret)
-		goto out1;
+	int ret = 0;
 
-	snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2", spu->number);
-	ret = request_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc,
-		 spu_irq_class_2, 0, spu->irq_c2, spu);
-	if (ret)
-		goto out2;
-	goto out;
+	if (spu->irqs[0] != NO_IRQ) {
+		snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
+			 spu->number);
+		ret = request_irq(spu->irqs[0], spu_irq_class_0,
+				  0, spu->irq_c0, spu);
+		if (ret)
+			goto bail0;
+	}
+	if (spu->irqs[1] != NO_IRQ) {
+		snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
+			 spu->number);
+		ret = request_irq(spu->irqs[1], spu_irq_class_1,
+				  0, spu->irq_c1, spu);
+		if (ret)
+			goto bail1;
+	}
+	if (spu->irqs[2] != NO_IRQ) {
+		snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
+			 spu->number);
+		ret = request_irq(spu->irqs[2], spu_irq_class_2,
+				  0, spu->irq_c2, spu);
+		if (ret)
+			goto bail2;
+	}
+	return 0;
 
-out2:
-	free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu);
-out1:
-	free_irq(irq_base + spu->isrc, spu);
-out:
+bail2:
+	if (spu->irqs[1] != NO_IRQ)
+		free_irq(spu->irqs[1], spu);
+bail1:
+	if (spu->irqs[0] != NO_IRQ)
+		free_irq(spu->irqs[0], spu);
+bail0:
 	return ret;
 }
 
-static void
-spu_free_irqs(struct spu *spu)
+static void spu_free_irqs(struct spu *spu)
 {
-	int irq_base;
-
-	irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET;
-
-	free_irq(irq_base + spu->isrc, spu);
-	free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu);
-	free_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc, spu);
+	if (spu->irqs[0] != NO_IRQ)
+		free_irq(spu->irqs[0], spu);
+	if (spu->irqs[1] != NO_IRQ)
+		free_irq(spu->irqs[1], spu);
+	if (spu->irqs[2] != NO_IRQ)
+		free_irq(spu->irqs[2], spu);
 }
 
-static LIST_HEAD(spu_list);
-static DECLARE_MUTEX(spu_mutex);
-
-static void spu_init_channels(struct spu *spu)
+void spu_init_channels(struct spu *spu)
 {
 	static const struct {
 		 unsigned channel;
@@ -377,332 +517,333 @@ static void spu_init_channels(struct spu *spu)
 		out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
 	}
 }
+EXPORT_SYMBOL_GPL(spu_init_channels);
 
-struct spu *spu_alloc(void)
+static struct bus_type spu_subsys = {
+	.name = "spu",
+	.dev_name = "spu",
+};
+
+int spu_add_dev_attr(struct device_attribute *attr)
 {
 	struct spu *spu;
 
-	down(&spu_mutex);
-	if (!list_empty(&spu_list)) {
-		spu = list_entry(spu_list.next, struct spu, list);
-		list_del_init(&spu->list);
-		pr_debug("Got SPU %x %d\n", spu->isrc, spu->number);
-	} else {
-		pr_debug("No SPU left\n");
-		spu = NULL;
-	}
-	up(&spu_mutex);
-
-	if (spu)
-		spu_init_channels(spu);
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		device_create_file(&spu->dev, attr);
+	mutex_unlock(&spu_full_list_mutex);
 
-	return spu;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(spu_alloc);
+EXPORT_SYMBOL_GPL(spu_add_dev_attr);
 
-void spu_free(struct spu *spu)
+int spu_add_dev_attr_group(struct attribute_group *attrs)
 {
-	down(&spu_mutex);
-	list_add_tail(&spu->list, &spu_list);
-	up(&spu_mutex);
-}
-EXPORT_SYMBOL_GPL(spu_free);
+	struct spu *spu;
+	int rc = 0;
 
-static int spu_handle_mm_fault(struct spu *spu)
-{
-	struct mm_struct *mm = spu->mm;
-	struct vm_area_struct *vma;
-	u64 ea, dsisr, is_write;
-	int ret;
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		rc = sysfs_create_group(&spu->dev.kobj, attrs);
 
-	ea = spu->dar;
-	dsisr = spu->dsisr;
-#if 0
-	if (!IS_VALID_EA(ea)) {
-		return -EFAULT;
-	}
-#endif /* XXX */
-	if (mm == NULL) {
-		return -EFAULT;
-	}
-	if (mm->pgd == NULL) {
-		return -EFAULT;
-	}
+		/* we're in trouble here, but try unwinding anyway */
+		if (rc) {
+			printk(KERN_ERR "%s: can't create sysfs group '%s'\n",
+					__func__, attrs->name);
 
-	down_read(&mm->mmap_sem);
-	vma = find_vma(mm, ea);
-	if (!vma)
-		goto bad_area;
-	if (vma->vm_start <= ea)
-		goto good_area;
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		goto bad_area;
-#if 0
-	if (expand_stack(vma, ea))
-		goto bad_area;
-#endif /* XXX */
-good_area:
-	is_write = dsisr & MFC_DSISR_ACCESS_PUT;
-	if (is_write) {
-		if (!(vma->vm_flags & VM_WRITE))
-			goto bad_area;
-	} else {
-		if (dsisr & MFC_DSISR_ACCESS_DENIED)
-			goto bad_area;
-		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
-			goto bad_area;
-	}
-	ret = 0;
-	switch (handle_mm_fault(mm, vma, ea, is_write)) {
-	case VM_FAULT_MINOR:
-		current->min_flt++;
-		break;
-	case VM_FAULT_MAJOR:
-		current->maj_flt++;
-		break;
-	case VM_FAULT_SIGBUS:
-		ret = -EFAULT;
-		goto bad_area;
-	case VM_FAULT_OOM:
-		ret = -ENOMEM;
-		goto bad_area;
-	default:
-		BUG();
+			list_for_each_entry_continue_reverse(spu,
+					&spu_full_list, full_list)
+				sysfs_remove_group(&spu->dev.kobj, attrs);
+			break;
+		}
 	}
-	up_read(&mm->mmap_sem);
-	return ret;
 
-bad_area:
-	up_read(&mm->mmap_sem);
-	return -EFAULT;
+	mutex_unlock(&spu_full_list_mutex);
+
+	return rc;
 }
+EXPORT_SYMBOL_GPL(spu_add_dev_attr_group);
 
-int spu_irq_class_1_bottom(struct spu *spu)
+
+void spu_remove_dev_attr(struct device_attribute *attr)
 {
-	u64 ea, dsisr, access, error = 0UL;
-	int ret = 0;
+	struct spu *spu;
 
-	ea = spu->dar;
-	dsisr = spu->dsisr;
-	if (dsisr & MFC_DSISR_PTE_NOT_FOUND) {
-		access = (_PAGE_PRESENT | _PAGE_USER);
-		access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
-		if (hash_page(ea, access, 0x300) != 0)
-			error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-	}
-	if ((error & CLASS1_ENABLE_STORAGE_FAULT_INTR) ||
-	    (dsisr & MFC_DSISR_ACCESS_DENIED)) {
-		if ((ret = spu_handle_mm_fault(spu)) != 0)
-			error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-		else
-			error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR;
-	}
-	spu->dar = 0UL;
-	spu->dsisr = 0UL;
-	if (!error) {
-		spu_restart_dma(spu);
-	} else {
-		__spu_trap_invalid_dma(spu);
-	}
-	return ret;
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		device_remove_file(&spu->dev, attr);
+	mutex_unlock(&spu_full_list_mutex);
 }
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr);
 
-void spu_irq_setaffinity(struct spu *spu, int cpu)
+void spu_remove_dev_attr_group(struct attribute_group *attrs)
 {
-	u64 target = iic_get_target_id(cpu);
-	u64 route = target << 48 | target << 32 | target << 16;
-	spu_int_route_set(spu, route);
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list)
+		sysfs_remove_group(&spu->dev.kobj, attrs);
+	mutex_unlock(&spu_full_list_mutex);
 }
-EXPORT_SYMBOL_GPL(spu_irq_setaffinity);
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group);
 
-static void __iomem * __init map_spe_prop(struct device_node *n,
-						 const char *name)
+static int spu_create_dev(struct spu *spu)
 {
-	struct address_prop {
-		unsigned long address;
-		unsigned int len;
-	} __attribute__((packed)) *prop;
-
-	void *p;
-	int proplen;
-
-	p = get_property(n, name, &proplen);
-	if (proplen != sizeof (struct address_prop))
-		return NULL;
+	int ret;
 
-	prop = p;
+	spu->dev.id = spu->number;
+	spu->dev.bus = &spu_subsys;
+	ret = device_register(&spu->dev);
+	if (ret) {
+		printk(KERN_ERR "Can't register SPU %d with sysfs\n",
+				spu->number);
+		return ret;
+	}
 
-	return ioremap(prop->address, prop->len);
-}
+	sysfs_add_device_to_node(&spu->dev, spu->node);
 
-static void spu_unmap(struct spu *spu)
-{
-	iounmap(spu->priv2);
-	iounmap(spu->priv1);
-	iounmap(spu->problem);
-	iounmap((u8 __iomem *)spu->local_store);
+	return 0;
 }
 
-static int __init spu_map_device(struct spu *spu, struct device_node *spe)
+static int __init create_spu(void *data)
 {
-	char *prop;
+	struct spu *spu;
 	int ret;
+	static int number;
+	unsigned long flags;
+	struct timespec ts;
 
-	ret = -ENODEV;
-	prop = get_property(spe, "isrc", NULL);
-	if (!prop)
+	ret = -ENOMEM;
+	spu = kzalloc(sizeof (*spu), GFP_KERNEL);
+	if (!spu)
 		goto out;
-	spu->isrc = *(unsigned int *)prop;
 
-	spu->name = get_property(spe, "name", NULL);
-	if (!spu->name)
-		goto out;
+	spu->alloc_state = SPU_FREE;
 
-	prop = get_property(spe, "local-store", NULL);
-	if (!prop)
-		goto out;
-	spu->local_store_phys = *(unsigned long *)prop;
+	spin_lock_init(&spu->register_lock);
+	spin_lock(&spu_lock);
+	spu->number = number++;
+	spin_unlock(&spu_lock);
 
-	/* we use local store as ram, not io memory */
-	spu->local_store = (void __force *)map_spe_prop(spe, "local-store");
-	if (!spu->local_store)
-		goto out;
+	ret = spu_create_spu(spu, data);
+
+	if (ret)
+		goto out_free;
+
+	spu_mfc_sdr_setup(spu);
+	spu_mfc_sr1_set(spu, 0x33);
+	ret = spu_request_irqs(spu);
+	if (ret)
+		goto out_destroy;
+
+	ret = spu_create_dev(spu);
+	if (ret)
+		goto out_free_irqs;
+
+	mutex_lock(&cbe_spu_info[spu->node].list_mutex);
+	list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
+	cbe_spu_info[spu->node].n_spus++;
+	mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
 
-	spu->problem= map_spe_prop(spe, "problem");
-	if (!spu->problem)
-		goto out_unmap;
+	mutex_lock(&spu_full_list_mutex);
+	spin_lock_irqsave(&spu_full_list_lock, flags);
+	list_add(&spu->full_list, &spu_full_list);
+	spin_unlock_irqrestore(&spu_full_list_lock, flags);
+	mutex_unlock(&spu_full_list_mutex);
 
-	spu->priv1= map_spe_prop(spe, "priv1");
-	/* priv1 is not available on a hypervisor */
+	spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
+	ktime_get_ts(&ts);
+	spu->stats.tstamp = timespec_to_ns(&ts);
+
+	INIT_LIST_HEAD(&spu->aff_list);
 
-	spu->priv2= map_spe_prop(spe, "priv2");
-	if (!spu->priv2)
-		goto out_unmap;
-	ret = 0;
 	goto out;
 
-out_unmap:
-	spu_unmap(spu);
+out_free_irqs:
+	spu_free_irqs(spu);
+out_destroy:
+	spu_destroy_spu(spu);
+out_free:
+	kfree(spu);
 out:
 	return ret;
 }
 
-static int __init find_spu_node_id(struct device_node *spe)
+static const char *spu_state_names[] = {
+	"user", "system", "iowait", "idle"
+};
+
+static unsigned long long spu_acct_time(struct spu *spu,
+		enum spu_utilization_state state)
 {
-	unsigned int *id;
-	struct device_node *cpu;
+	struct timespec ts;
+	unsigned long long time = spu->stats.times[state];
+
+	/*
+	 * If the spu is idle or the context is stopped, utilization
+	 * statistics are not updated.  Apply the time delta from the
+	 * last recorded state of the spu.
+	 */
+	if (spu->stats.util_state == state) {
+		ktime_get_ts(&ts);
+		time += timespec_to_ns(&ts) - spu->stats.tstamp;
+	}
+
+	return time / NSEC_PER_MSEC;
+}
 
-	cpu = spe->parent->parent;
-	id = (unsigned int *)get_property(cpu, "node-id", NULL);
 
-	return id ? *id : 0;
+static ssize_t spu_stat_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct spu *spu = container_of(dev, struct spu, dev);
+
+	return sprintf(buf, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		spu_state_names[spu->stats.util_state],
+		spu_acct_time(spu, SPU_UTIL_USER),
+		spu_acct_time(spu, SPU_UTIL_SYSTEM),
+		spu_acct_time(spu, SPU_UTIL_IOWAIT),
+		spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
+		spu->stats.vol_ctx_switch,
+		spu->stats.invol_ctx_switch,
+		spu->stats.slb_flt,
+		spu->stats.hash_flt,
+		spu->stats.min_flt,
+		spu->stats.maj_flt,
+		spu->stats.class2_intr,
+		spu->stats.libassist);
 }
 
-static int __init create_spu(struct device_node *spe)
+static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL);
+
+#ifdef CONFIG_KEXEC
+
+struct crash_spu_info {
+	struct spu *spu;
+	u32 saved_spu_runcntl_RW;
+	u32 saved_spu_status_R;
+	u32 saved_spu_npc_RW;
+	u64 saved_mfc_sr1_RW;
+	u64 saved_mfc_dar;
+	u64 saved_mfc_dsisr;
+};
+
+#define CRASH_NUM_SPUS	16	/* Enough for current hardware */
+static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
+
+static void crash_kexec_stop_spus(void)
 {
 	struct spu *spu;
-	int ret;
-	static int number;
+	int i;
+	u64 tmp;
 
-	ret = -ENOMEM;
-	spu = kmalloc(sizeof (*spu), GFP_KERNEL);
-	if (!spu)
-		goto out;
+	for (i = 0; i < CRASH_NUM_SPUS; i++) {
+		if (!crash_spu_info[i].spu)
+			continue;
 
-	ret = spu_map_device(spu, spe);
-	if (ret)
-		goto out_free;
+		spu = crash_spu_info[i].spu;
 
-	spu->node = find_spu_node_id(spe);
-	spu->stop_code = 0;
-	spu->slb_replace = 0;
-	spu->mm = NULL;
-	spu->ctx = NULL;
-	spu->rq = NULL;
-	spu->pid = 0;
-	spu->class_0_pending = 0;
-	spu->flags = 0UL;
-	spu->dar = 0UL;
-	spu->dsisr = 0UL;
-	spin_lock_init(&spu->register_lock);
+		crash_spu_info[i].saved_spu_runcntl_RW =
+			in_be32(&spu->problem->spu_runcntl_RW);
+		crash_spu_info[i].saved_spu_status_R =
+			in_be32(&spu->problem->spu_status_R);
+		crash_spu_info[i].saved_spu_npc_RW =
+			in_be32(&spu->problem->spu_npc_RW);
 
-	spu_mfc_sdr_set(spu, mfspr(SPRN_SDR1));
-	spu_mfc_sr1_set(spu, 0x33);
+		crash_spu_info[i].saved_mfc_dar    = spu_mfc_dar_get(spu);
+		crash_spu_info[i].saved_mfc_dsisr  = spu_mfc_dsisr_get(spu);
+		tmp = spu_mfc_sr1_get(spu);
+		crash_spu_info[i].saved_mfc_sr1_RW = tmp;
 
-	spu->ibox_callback = NULL;
-	spu->wbox_callback = NULL;
-	spu->stop_callback = NULL;
+		tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+		spu_mfc_sr1_set(spu, tmp);
 
-	down(&spu_mutex);
-	spu->number = number++;
-	ret = spu_request_irqs(spu);
-	if (ret)
-		goto out_unmap;
+		__delay(200);
+	}
+}
 
-	list_add(&spu->list, &spu_list);
-	up(&spu_mutex);
+static void crash_register_spus(struct list_head *list)
+{
+	struct spu *spu;
+	int ret;
 
-	pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n",
-		spu->name, spu->isrc, spu->local_store,
-		spu->problem, spu->priv1, spu->priv2, spu->number);
-	goto out;
+	list_for_each_entry(spu, list, full_list) {
+		if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
+			continue;
 
-out_unmap:
-	up(&spu_mutex);
-	spu_unmap(spu);
-out_free:
-	kfree(spu);
-out:
-	return ret;
+		crash_spu_info[spu->number].spu = spu;
+	}
+
+	ret = crash_shutdown_register(&crash_kexec_stop_spus);
+	if (ret)
+		printk(KERN_ERR "Could not register SPU crash handler");
 }
 
-static void destroy_spu(struct spu *spu)
+#else
+static inline void crash_register_spus(struct list_head *list)
 {
-	list_del_init(&spu->list);
-
-	spu_free_irqs(spu);
-	spu_unmap(spu);
-	kfree(spu);
 }
+#endif
 
-static void cleanup_spu_base(void)
+static void spu_shutdown(void)
 {
-	struct spu *spu, *tmp;
-	down(&spu_mutex);
-	list_for_each_entry_safe(spu, tmp, &spu_list, list)
-		destroy_spu(spu);
-	up(&spu_mutex);
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		spu_free_irqs(spu);
+		spu_destroy_spu(spu);
+	}
+	mutex_unlock(&spu_full_list_mutex);
 }
-module_exit(cleanup_spu_base);
+
+static struct syscore_ops spu_syscore_ops = {
+	.shutdown = spu_shutdown,
+};
 
 static int __init init_spu_base(void)
 {
-	struct device_node *node;
-	int ret;
+	int i, ret = 0;
 
-	ret = -ENODEV;
-	for (node = of_find_node_by_type(NULL, "spe");
-			node; node = of_find_node_by_type(node, "spe")) {
-		ret = create_spu(node);
-		if (ret) {
-			printk(KERN_WARNING "%s: Error initializing %s\n",
-				__FUNCTION__, node->name);
-			cleanup_spu_base();
-			break;
-		}
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		mutex_init(&cbe_spu_info[i].list_mutex);
+		INIT_LIST_HEAD(&cbe_spu_info[i].spus);
 	}
-	/* in some old firmware versions, the spe is called 'spc', so we
-	   look for that as well */
-	for (node = of_find_node_by_type(NULL, "spc");
-			node; node = of_find_node_by_type(node, "spc")) {
-		ret = create_spu(node);
-		if (ret) {
-			printk(KERN_WARNING "%s: Error initializing %s\n",
-				__FUNCTION__, node->name);
-			cleanup_spu_base();
-			break;
-		}
+
+	if (!spu_management_ops)
+		goto out;
+
+	/* create system subsystem for spus */
+	ret = subsys_system_register(&spu_subsys, NULL);
+	if (ret)
+		goto out;
+
+	ret = spu_enumerate_spus(create_spu);
+
+	if (ret < 0) {
+		printk(KERN_WARNING "%s: Error initializing spus\n",
+			__func__);
+		goto out_unregister_subsys;
 	}
+
+	if (ret > 0)
+		fb_append_extra_logo(&logo_spe_clut224, ret);
+
+	mutex_lock(&spu_full_list_mutex);
+	xmon_register_spus(&spu_full_list);
+	crash_register_spus(&spu_full_list);
+	mutex_unlock(&spu_full_list_mutex);
+	spu_add_dev_attr(&dev_attr_stat);
+	register_syscore_ops(&spu_syscore_ops);
+
+	spu_init_affinity();
+
+	return 0;
+
+ out_unregister_subsys:
+	bus_unregister(&spu_subsys);
+ out:
 	return ret;
 }
 module_init(init_spu_base);
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
new file mode 100644
index 00000000000..b0ec78e8ad6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -0,0 +1,73 @@
+/*
+ * System call callback functions for SPUs
+ */
+
+#undef DEBUG
+
+#include <linux/kallsyms.h>
+#include <linux/export.h>
+#include <linux/syscalls.h>
+
+#include <asm/spu.h>
+#include <asm/syscalls.h>
+#include <asm/unistd.h>
+
+/*
+ * This table defines the system calls that an SPU can call.
+ * It is currently a subset of the 64 bit powerpc system calls,
+ * with the exact semantics.
+ *
+ * The reasons for disabling some of the system calls are:
+ * 1. They interact with the way SPU syscalls are handled
+ *    and we can't let them execute ever:
+ *	restart_syscall, exit, for, execve, ptrace, ...
+ * 2. They are deprecated and replaced by other means:
+ *	uselib, pciconfig_*, sysfs, ...
+ * 3. They are somewhat interacting with the system in a way
+ *    we don't want an SPU to:
+ *	reboot, init_module, mount, kexec_load
+ * 4. They are optional and we can't rely on them being
+ *    linked into the kernel. Unfortunately, the cond_syscall
+ *    helper does not work here as it does not add the necessary
+ *    opd symbols:
+ *	mbind, mq_open, ipc, ...
+ */
+
+static void *spu_syscall_table[] = {
+#define SYSCALL(func)		sys_ni_syscall,
+#define COMPAT_SYS(func)	sys_ni_syscall,
+#define PPC_SYS(func)		sys_ni_syscall,
+#define OLDSYS(func)		sys_ni_syscall,
+#define SYS32ONLY(func)		sys_ni_syscall,
+#define SYSX(f, f3264, f32)	sys_ni_syscall,
+
+#define SYSCALL_SPU(func)	sys_##func,
+#define COMPAT_SYS_SPU(func)	sys_##func,
+#define PPC_SYS_SPU(func)	ppc_##func,
+#define SYSX_SPU(f, f3264, f32)	f,
+
+#include <asm/systbl.h>
+};
+
+long spu_sys_callback(struct spu_syscall_block *s)
+{
+	long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6);
+
+	if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) {
+		pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret);
+		return -ENOSYS;
+	}
+
+	syscall = spu_syscall_table[s->nr_ret];
+
+	pr_debug("SPU-syscall "
+		 "%pSR:syscall%lld(%llx, %llx, %llx, %llx, %llx, %llx)\n",
+		 syscall,
+		 s->nr_ret,
+		 s->parm[0], s->parm[1], s->parm[2],
+		 s->parm[3], s->parm[4], s->parm[5]);
+
+	return syscall(s->parm[0], s->parm[1], s->parm[2],
+		       s->parm[3], s->parm[4], s->parm[5]);
+}
+EXPORT_SYMBOL_GPL(spu_sys_callback);
diff --git a/arch/powerpc/platforms/cell/spu_fault.c b/arch/powerpc/platforms/cell/spu_fault.c
new file mode 100644
index 00000000000..641e7273d75
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_fault.c
@@ -0,0 +1,94 @@
+/*
+ * SPU mm fault handler
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2007
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+		unsigned long dsisr, unsigned *flt)
+{
+	struct vm_area_struct *vma;
+	unsigned long is_write;
+	int ret;
+
+	if (mm == NULL)
+		return -EFAULT;
+
+	if (mm->pgd == NULL)
+		return -EFAULT;
+
+	down_read(&mm->mmap_sem);
+	ret = -EFAULT;
+	vma = find_vma(mm, ea);
+	if (!vma)
+		goto out_unlock;
+
+	if (ea < vma->vm_start) {
+		if (!(vma->vm_flags & VM_GROWSDOWN))
+			goto out_unlock;
+		if (expand_stack(vma, ea))
+			goto out_unlock;
+	}
+
+	is_write = dsisr & MFC_DSISR_ACCESS_PUT;
+	if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto out_unlock;
+	} else {
+		if (dsisr & MFC_DSISR_ACCESS_DENIED)
+			goto out_unlock;
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto out_unlock;
+	}
+
+	ret = 0;
+	*flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
+	if (unlikely(*flt & VM_FAULT_ERROR)) {
+		if (*flt & VM_FAULT_OOM) {
+			ret = -ENOMEM;
+			goto out_unlock;
+		} else if (*flt & VM_FAULT_SIGBUS) {
+			ret = -EFAULT;
+			goto out_unlock;
+		}
+		BUG();
+	}
+
+	if (*flt & VM_FAULT_MAJOR)
+		current->maj_flt++;
+	else
+		current->min_flt++;
+
+out_unlock:
+	up_read(&mm->mmap_sem);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spu_handle_mm_fault);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
new file mode 100644
index 00000000000..c3327f3d8cf
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -0,0 +1,555 @@
+/*
+ * spu management operations for of based platforms
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/export.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "spufs/spufs.h"
+#include "interrupt.h"
+
+struct device_node *spu_devnode(struct spu *spu)
+{
+	return spu->devnode;
+}
+
+EXPORT_SYMBOL_GPL(spu_devnode);
+
+static u64 __init find_spu_unit_number(struct device_node *spe)
+{
+	const unsigned int *prop;
+	int proplen;
+
+	/* new device trees should provide the physical-id attribute */
+	prop = of_get_property(spe, "physical-id", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	/* celleb device tree provides the unit-id */
+	prop = of_get_property(spe, "unit-id", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	/* legacy device trees provide the id in the reg attribute */
+	prop = of_get_property(spe, "reg", &proplen);
+	if (proplen == 4)
+		return (u64)*prop;
+
+	return 0;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		iounmap(spu->priv1);
+	iounmap(spu->priv2);
+	iounmap(spu->problem);
+	iounmap((__force u8 __iomem *)spu->local_store);
+}
+
+static int __init spu_map_interrupts_old(struct spu *spu,
+	struct device_node *np)
+{
+	unsigned int isrc;
+	const u32 *tmp;
+	int nid;
+
+	/* Get the interrupt source unit from the device-tree */
+	tmp = of_get_property(np, "isrc", NULL);
+	if (!tmp)
+		return -ENODEV;
+	isrc = tmp[0];
+
+	tmp = of_get_property(np->parent->parent, "node-id", NULL);
+	if (!tmp) {
+		printk(KERN_WARNING "%s: can't find node-id\n", __func__);
+		nid = spu->node;
+	} else
+		nid = tmp[0];
+
+	/* Add the node number */
+	isrc |= nid << IIC_IRQ_NODE_SHIFT;
+
+	/* Now map interrupts of all 3 classes */
+	spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
+	spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
+	spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
+
+	/* Right now, we only fail if class 2 failed */
+	return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
+}
+
+static void __iomem * __init spu_map_prop_old(struct spu *spu,
+					      struct device_node *n,
+					      const char *name)
+{
+	const struct address_prop {
+		unsigned long address;
+		unsigned int len;
+	} __attribute__((packed)) *prop;
+	int proplen;
+
+	prop = of_get_property(n, name, &proplen);
+	if (prop == NULL || proplen != sizeof (struct address_prop))
+		return NULL;
+
+	return ioremap(prop->address, prop->len);
+}
+
+static int __init spu_map_device_old(struct spu *spu)
+{
+	struct device_node *node = spu->devnode;
+	const char *prop;
+	int ret;
+
+	ret = -ENODEV;
+	spu->name = of_get_property(node, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	prop = of_get_property(node, "local-store", NULL);
+	if (!prop)
+		goto out;
+	spu->local_store_phys = *(unsigned long *)prop;
+
+	/* we use local store as ram, not io memory */
+	spu->local_store = (void __force *)
+		spu_map_prop_old(spu, node, "local-store");
+	if (!spu->local_store)
+		goto out;
+
+	prop = of_get_property(node, "problem", NULL);
+	if (!prop)
+		goto out_unmap;
+	spu->problem_phys = *(unsigned long *)prop;
+
+	spu->problem = spu_map_prop_old(spu, node, "problem");
+	if (!spu->problem)
+		goto out_unmap;
+
+	spu->priv2 = spu_map_prop_old(spu, node, "priv2");
+	if (!spu->priv2)
+		goto out_unmap;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		spu->priv1 = spu_map_prop_old(spu, node, "priv1");
+		if (!spu->priv1)
+			goto out_unmap;
+	}
+
+	ret = 0;
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+{
+	struct of_phandle_args oirq;
+	int ret;
+	int i;
+
+	for (i=0; i < 3; i++) {
+		ret = of_irq_parse_one(np, i, &oirq);
+		if (ret) {
+			pr_debug("spu_new: failed to get irq %d\n", i);
+			goto err;
+		}
+		ret = -EINVAL;
+		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.args[0],
+			 oirq.np->full_name);
+		spu->irqs[i] = irq_create_of_mapping(&oirq);
+		if (spu->irqs[i] == NO_IRQ) {
+			pr_debug("spu_new: failed to map it !\n");
+			goto err;
+		}
+	}
+	return 0;
+
+err:
+	pr_debug("failed to map irq %x for spu %s\n", *oirq.args,
+		spu->name);
+	for (; i >= 0; i--) {
+		if (spu->irqs[i] != NO_IRQ)
+			irq_dispose_mapping(spu->irqs[i]);
+	}
+	return ret;
+}
+
+static int spu_map_resource(struct spu *spu, int nr,
+			    void __iomem** virt, unsigned long *phys)
+{
+	struct device_node *np = spu->devnode;
+	struct resource resource = { };
+	unsigned long len;
+	int ret;
+
+	ret = of_address_to_resource(np, nr, &resource);
+	if (ret)
+		return ret;
+	if (phys)
+		*phys = resource.start;
+	len = resource_size(&resource);
+	*virt = ioremap(resource.start, len);
+	if (!*virt)
+		return -EINVAL;
+	return 0;
+}
+
+static int __init spu_map_device(struct spu *spu)
+{
+	struct device_node *np = spu->devnode;
+	int ret = -ENODEV;
+
+	spu->name = of_get_property(np, "name", NULL);
+	if (!spu->name)
+		goto out;
+
+	ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
+			       &spu->local_store_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 0\n",
+			 np->full_name);
+		goto out;
+	}
+	ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
+			       &spu->problem_phys);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 1\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 2\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		ret = spu_map_resource(spu, 3,
+			       (void __iomem**)&spu->priv1, NULL);
+	if (ret) {
+		pr_debug("spu_new: failed to map %s resource 3\n",
+			 np->full_name);
+		goto out_unmap;
+	}
+	pr_debug("spu_new: %s maps:\n", np->full_name);
+	pr_debug("  local store   : 0x%016lx -> 0x%p\n",
+		 spu->local_store_phys, spu->local_store);
+	pr_debug("  problem state : 0x%016lx -> 0x%p\n",
+		 spu->problem_phys, spu->problem);
+	pr_debug("  priv2         :                       0x%p\n", spu->priv2);
+	pr_debug("  priv1         :                       0x%p\n", spu->priv1);
+
+	return 0;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	pr_debug("failed to map spe %s: %d\n", spu->name, ret);
+	return ret;
+}
+
+static int __init of_enumerate_spus(int (*fn)(void *data))
+{
+	int ret;
+	struct device_node *node;
+	unsigned int n = 0;
+
+	ret = -ENODEV;
+	for (node = of_find_node_by_type(NULL, "spe");
+			node; node = of_find_node_by_type(node, "spe")) {
+		ret = fn(node);
+		if (ret) {
+			printk(KERN_WARNING "%s: Error initializing %s\n",
+				__func__, node->name);
+			break;
+		}
+		n++;
+	}
+	return ret ? ret : n;
+}
+
+static int __init of_create_spu(struct spu *spu, void *data)
+{
+	int ret;
+	struct device_node *spe = (struct device_node *)data;
+	static int legacy_map = 0, legacy_irq = 0;
+
+	spu->devnode = of_node_get(spe);
+	spu->spe_id = find_spu_unit_number(spe);
+
+	spu->node = of_node_to_nid(spe);
+	if (spu->node >= MAX_NUMNODES) {
+		printk(KERN_WARNING "SPE %s on node %d ignored,"
+		       " node number too big\n", spe->full_name, spu->node);
+		printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	ret = spu_map_device(spu);
+	if (ret) {
+		if (!legacy_map) {
+			legacy_map = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying to map old style\n", __func__);
+		}
+		ret = spu_map_device_old(spu);
+		if (ret) {
+			printk(KERN_ERR "Unable to map %s\n",
+				spu->name);
+			goto out;
+		}
+	}
+
+	ret = spu_map_interrupts(spu, spe);
+	if (ret) {
+		if (!legacy_irq) {
+			legacy_irq = 1;
+			printk(KERN_WARNING "%s: Legacy device tree found, "
+				"trying old style irq\n", __func__);
+		}
+		ret = spu_map_interrupts_old(spu, spe);
+		if (ret) {
+			printk(KERN_ERR "%s: could not map interrupts\n",
+				spu->name);
+			goto out_unmap;
+		}
+	}
+
+	pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name,
+		spu->local_store, spu->problem, spu->priv1,
+		spu->priv2, spu->number);
+	goto out;
+
+out_unmap:
+	spu_unmap(spu);
+out:
+	return ret;
+}
+
+static int of_destroy_spu(struct spu *spu)
+{
+	spu_unmap(spu);
+	of_node_put(spu->devnode);
+	return 0;
+}
+
+static void enable_spu_by_master_run(struct spu_context *ctx)
+{
+	ctx->ops->master_start(ctx);
+}
+
+static void disable_spu_by_master_run(struct spu_context *ctx)
+{
+	ctx->ops->master_stop(ctx);
+}
+
+/* Hardcoded affinity idxs for qs20 */
+#define QS20_SPES_PER_BE 8
+static int qs20_reg_idxs[QS20_SPES_PER_BE] =   { 0, 2, 4, 6, 7, 5, 3, 1 };
+static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
+
+static struct spu *spu_lookup_reg(int node, u32 reg)
+{
+	struct spu *spu;
+	const u32 *spu_reg;
+
+	list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+		spu_reg = of_get_property(spu_devnode(spu), "reg", NULL);
+		if (*spu_reg == reg)
+			return spu;
+	}
+	return NULL;
+}
+
+static void init_affinity_qs20_harcoded(void)
+{
+	int node, i;
+	struct spu *last_spu, *spu;
+	u32 reg;
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		last_spu = NULL;
+		for (i = 0; i < QS20_SPES_PER_BE; i++) {
+			reg = qs20_reg_idxs[i];
+			spu = spu_lookup_reg(node, reg);
+			if (!spu)
+				continue;
+			spu->has_mem_affinity = qs20_reg_memory[reg];
+			if (last_spu)
+				list_add_tail(&spu->aff_list,
+						&last_spu->aff_list);
+			last_spu = spu;
+		}
+	}
+}
+
+static int of_has_vicinity(void)
+{
+	struct device_node *dn;
+
+	for_each_node_by_type(dn, "spe") {
+		if (of_find_property(dn, "vicinity", NULL))  {
+			of_node_put(dn);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static struct spu *devnode_spu(int cbe, struct device_node *dn)
+{
+	struct spu *spu;
+
+	list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
+		if (spu_devnode(spu) == dn)
+			return spu;
+	return NULL;
+}
+
+static struct spu *
+neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid)
+{
+	struct spu *spu;
+	struct device_node *spu_dn;
+	const phandle *vic_handles;
+	int lenp, i;
+
+	list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
+		spu_dn = spu_devnode(spu);
+		if (spu_dn == avoid)
+			continue;
+		vic_handles = of_get_property(spu_dn, "vicinity", &lenp);
+		for (i=0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == target->phandle)
+				return spu;
+		}
+	}
+	return NULL;
+}
+
+static void init_affinity_node(int cbe)
+{
+	struct spu *spu, *last_spu;
+	struct device_node *vic_dn, *last_spu_dn;
+	phandle avoid_ph;
+	const phandle *vic_handles;
+	const char *name;
+	int lenp, i, added;
+
+	last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu,
+								cbe_list);
+	avoid_ph = 0;
+	for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
+		last_spu_dn = spu_devnode(last_spu);
+		vic_handles = of_get_property(last_spu_dn, "vicinity", &lenp);
+
+		/*
+		 * Walk through each phandle in vicinity property of the spu
+		 * (tipically two vicinity phandles per spe node)
+		 */
+		for (i = 0; i < (lenp / sizeof(phandle)); i++) {
+			if (vic_handles[i] == avoid_ph)
+				continue;
+
+			vic_dn = of_find_node_by_phandle(vic_handles[i]);
+			if (!vic_dn)
+				continue;
+
+			/* a neighbour might be spe, mic-tm, or bif0 */
+			name = of_get_property(vic_dn, "name", NULL);
+			if (!name)
+				continue;
+
+			if (strcmp(name, "spe") == 0) {
+				spu = devnode_spu(cbe, vic_dn);
+				avoid_ph = last_spu_dn->phandle;
+			} else {
+				/*
+				 * "mic-tm" and "bif0" nodes do not have
+				 * vicinity property. So we need to find the
+				 * spe which has vic_dn as neighbour, but
+				 * skipping the one we came from (last_spu_dn)
+				 */
+				spu = neighbour_spu(cbe, vic_dn, last_spu_dn);
+				if (!spu)
+					continue;
+				if (!strcmp(name, "mic-tm")) {
+					last_spu->has_mem_affinity = 1;
+					spu->has_mem_affinity = 1;
+				}
+				avoid_ph = vic_dn->phandle;
+			}
+
+			list_add_tail(&spu->aff_list, &last_spu->aff_list);
+			last_spu = spu;
+			break;
+		}
+	}
+}
+
+static void init_affinity_fw(void)
+{
+	int cbe;
+
+	for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
+		init_affinity_node(cbe);
+}
+
+static int __init init_affinity(void)
+{
+	if (of_has_vicinity()) {
+		init_affinity_fw();
+	} else {
+		long root = of_get_flat_dt_root();
+		if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+			init_affinity_qs20_harcoded();
+		else
+			printk("No affinity configuration found\n");
+	}
+
+	return 0;
+}
+
+const struct spu_management_ops spu_management_of_ops = {
+	.enumerate_spus = of_enumerate_spus,
+	.create_spu = of_create_spu,
+	.destroy_spu = of_destroy_spu,
+	.enable_spu = enable_spu_by_master_run,
+	.disable_spu = disable_spu_by_master_run,
+	.init_affinity = init_affinity,
+};
diff --git a/arch/powerpc/platforms/cell/spu_notify.c b/arch/powerpc/platforms/cell/spu_notify.c
new file mode 100644
index 00000000000..afdf857c318
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_notify.c
@@ -0,0 +1,68 @@
+/*
+ * Move OProfile dependencies from spufs module to the kernel so it
+ * can run on non-cell PPC.
+ *
+ * Copyright (C) IBM 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/notifier.h>
+#include <asm/spu.h>
+#include "spufs/spufs.h"
+
+static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
+
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
+{
+	blocking_notifier_call_chain(&spu_switch_notifier,
+				     ctx ? ctx->object_id : 0, spu);
+}
+EXPORT_SYMBOL_GPL(spu_switch_notify);
+
+int spu_switch_event_register(struct notifier_block *n)
+{
+	int ret;
+	ret = blocking_notifier_chain_register(&spu_switch_notifier, n);
+	if (!ret)
+		notify_spus_active();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_register);
+
+int spu_switch_event_unregister(struct notifier_block *n)
+{
+	return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
+}
+EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
+
+void spu_set_profile_private_kref(struct spu_context *ctx,
+				  struct kref *prof_info_kref,
+				  void (* prof_info_release) (struct kref *kref))
+{
+	ctx->prof_priv_kref = prof_info_kref;
+	ctx->prof_priv_release = prof_info_release;
+}
+EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
+
+void *spu_get_profile_private_kref(struct spu_context *ctx)
+{
+	return ctx->prof_priv_kref;
+}
+EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
+
diff --git a/arch/powerpc/platforms/cell/spu_priv1.c b/arch/powerpc/platforms/cell/spu_priv1.c
deleted file mode 100644
index b2656421c7b..00000000000
--- a/arch/powerpc/platforms/cell/spu_priv1.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * access to SPU privileged registers
- */
-#include <linux/module.h>
-
-#include <asm/io.h>
-#include <asm/spu.h>
-
-void spu_int_mask_and(struct spu *spu, int class, u64 mask)
-{
-	u64 old_mask;
-
-	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
-	out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask);
-}
-EXPORT_SYMBOL_GPL(spu_int_mask_and);
-
-void spu_int_mask_or(struct spu *spu, int class, u64 mask)
-{
-	u64 old_mask;
-
-	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
-	out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask);
-}
-EXPORT_SYMBOL_GPL(spu_int_mask_or);
-
-void spu_int_mask_set(struct spu *spu, int class, u64 mask)
-{
-	out_be64(&spu->priv1->int_mask_RW[class], mask);
-}
-EXPORT_SYMBOL_GPL(spu_int_mask_set);
-
-u64 spu_int_mask_get(struct spu *spu, int class)
-{
-	return in_be64(&spu->priv1->int_mask_RW[class]);
-}
-EXPORT_SYMBOL_GPL(spu_int_mask_get);
-
-void spu_int_stat_clear(struct spu *spu, int class, u64 stat)
-{
-	out_be64(&spu->priv1->int_stat_RW[class], stat);
-}
-EXPORT_SYMBOL_GPL(spu_int_stat_clear);
-
-u64 spu_int_stat_get(struct spu *spu, int class)
-{
-	return in_be64(&spu->priv1->int_stat_RW[class]);
-}
-EXPORT_SYMBOL_GPL(spu_int_stat_get);
-
-void spu_int_route_set(struct spu *spu, u64 route)
-{
-	out_be64(&spu->priv1->int_route_RW, route);
-}
-EXPORT_SYMBOL_GPL(spu_int_route_set);
-
-u64 spu_mfc_dar_get(struct spu *spu)
-{
-	return in_be64(&spu->priv1->mfc_dar_RW);
-}
-EXPORT_SYMBOL_GPL(spu_mfc_dar_get);
-
-u64 spu_mfc_dsisr_get(struct spu *spu)
-{
-	return in_be64(&spu->priv1->mfc_dsisr_RW);
-}
-EXPORT_SYMBOL_GPL(spu_mfc_dsisr_get);
-
-void spu_mfc_dsisr_set(struct spu *spu, u64 dsisr)
-{
-	out_be64(&spu->priv1->mfc_dsisr_RW, dsisr);
-}
-EXPORT_SYMBOL_GPL(spu_mfc_dsisr_set);
-
-void spu_mfc_sdr_set(struct spu *spu, u64 sdr)
-{
-	out_be64(&spu->priv1->mfc_sdr_RW, sdr);
-}
-EXPORT_SYMBOL_GPL(spu_mfc_sdr_set);
-
-void spu_mfc_sr1_set(struct spu *spu, u64 sr1)
-{
-	out_be64(&spu->priv1->mfc_sr1_RW, sr1);
-}
-EXPORT_SYMBOL_GPL(spu_mfc_sr1_set);
-
-u64 spu_mfc_sr1_get(struct spu *spu)
-{
-	return in_be64(&spu->priv1->mfc_sr1_RW);
-}
-EXPORT_SYMBOL_GPL(spu_mfc_sr1_get);
-
-void spu_mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
-{
-	out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id);
-}
-EXPORT_SYMBOL_GPL(spu_mfc_tclass_id_set);
-
-u64 spu_mfc_tclass_id_get(struct spu *spu)
-{
-	return in_be64(&spu->priv1->mfc_tclass_id_RW);
-}
-EXPORT_SYMBOL_GPL(spu_mfc_tclass_id_get);
-
-void spu_tlb_invalidate(struct spu *spu)
-{
-	out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul);
-}
-EXPORT_SYMBOL_GPL(spu_tlb_invalidate);
-
-void spu_resource_allocation_groupID_set(struct spu *spu, u64 id)
-{
-	out_be64(&spu->priv1->resource_allocation_groupID_RW, id);
-}
-EXPORT_SYMBOL_GPL(spu_resource_allocation_groupID_set);
-
-u64 spu_resource_allocation_groupID_get(struct spu *spu)
-{
-	return in_be64(&spu->priv1->resource_allocation_groupID_RW);
-}
-EXPORT_SYMBOL_GPL(spu_resource_allocation_groupID_get);
-
-void spu_resource_allocation_enable_set(struct spu *spu, u64 enable)
-{
-	out_be64(&spu->priv1->resource_allocation_enable_RW, enable);
-}
-EXPORT_SYMBOL_GPL(spu_resource_allocation_enable_set);
-
-u64 spu_resource_allocation_enable_get(struct spu *spu)
-{
-	return in_be64(&spu->priv1->resource_allocation_enable_RW);
-}
-EXPORT_SYMBOL_GPL(spu_resource_allocation_enable_get);
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
new file mode 100644
index 00000000000..66d33724f16
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -0,0 +1,180 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ *  (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+#include "interrupt.h"
+#include "spu_priv1_mmio.h"
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+	out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+	u64 old_mask;
+
+	old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+	out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask);
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+	out_be64(&spu->priv1->int_mask_RW[class], mask);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+	return in_be64(&spu->priv1->int_mask_RW[class]);
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+	out_be64(&spu->priv1->int_stat_RW[class], stat);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+	return in_be64(&spu->priv1->int_stat_RW[class]);
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+	u64 target;
+	u64 route;
+
+	if (nr_cpus_node(spu->node)) {
+		const struct cpumask *spumask = cpumask_of_node(spu->node),
+			*cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+		if (!cpumask_intersects(spumask, cpumask))
+			return;
+	}
+
+	target = iic_get_target_id(cpu);
+	route = target << 48 | target << 32 | target << 16;
+	out_be64(&spu->priv1->int_route_RW, route);
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_dar_RW);
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_dsisr_RW);
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+	out_be64(&spu->priv1->mfc_dsisr_RW, dsisr);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+	out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+	out_be64(&spu->priv1->mfc_sr1_RW, sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_sr1_RW);
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+	out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->mfc_tclass_id_RW);
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+	out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul);
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+	out_be64(&spu->priv1->resource_allocation_groupID_RW, id);
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->resource_allocation_groupID_RW);
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+	out_be64(&spu->priv1->resource_allocation_enable_RW, enable);
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+	return in_be64(&spu->priv1->resource_allocation_enable_RW);
+}
+
+const struct spu_priv1_ops spu_priv1_mmio_ops =
+{
+	.int_mask_and = int_mask_and,
+	.int_mask_or = int_mask_or,
+	.int_mask_set = int_mask_set,
+	.int_mask_get = int_mask_get,
+	.int_stat_clear = int_stat_clear,
+	.int_stat_get = int_stat_get,
+	.cpu_affinity_set = cpu_affinity_set,
+	.mfc_dar_get = mfc_dar_get,
+	.mfc_dsisr_get = mfc_dsisr_get,
+	.mfc_dsisr_set = mfc_dsisr_set,
+	.mfc_sdr_setup = mfc_sdr_setup,
+	.mfc_sr1_set = mfc_sr1_set,
+	.mfc_sr1_get = mfc_sr1_get,
+	.mfc_tclass_id_set = mfc_tclass_id_set,
+	.mfc_tclass_id_get = mfc_tclass_id_get,
+	.tlb_invalidate = tlb_invalidate,
+	.resource_allocation_groupID_set = resource_allocation_groupID_set,
+	.resource_allocation_groupID_get = resource_allocation_groupID_get,
+	.resource_allocation_enable_set = resource_allocation_enable_set,
+	.resource_allocation_enable_get = resource_allocation_enable_get,
+};
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.h b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
new file mode 100644
index 00000000000..7b62bd1cc25
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
@@ -0,0 +1,26 @@
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ *  Copyright (C) 2006 Sony Computer Entertainment Inc.
+ *  Copyright 2006 Sony Corp.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef SPU_PRIV1_MMIO_H
+#define SPU_PRIV1_MMIO_H
+
+struct device_node *spu_devnode(struct spu *spu);
+
+#endif /* SPU_PRIV1_MMIO_H */
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index 261b507a901..5e6e0bad6db 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -2,6 +2,7 @@
  * SPU file system -- system call stubs
  *
  * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * (C) Copyright 2006-2007, IBM Corporation
  *
  * Author: Arnd Bergmann <arndb@de.ibm.com>
  *
@@ -20,69 +21,158 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #include <linux/file.h>
+#include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/rcupdate.h>
+#include <linux/binfmts.h>
 
 #include <asm/spu.h>
 
-struct spufs_calls spufs_calls = {
-	.owner = NULL,
-};
+/* protected by rcu */
+static struct spufs_calls *spufs_calls;
 
-/* These stub syscalls are needed to have the actual implementation
- * within a loadable module. When spufs is built into the kernel,
- * this file is not used and the syscalls directly enter the fs code */
+#ifdef CONFIG_SPU_FS_MODULE
 
-asmlinkage long sys_spu_create(const char __user *name,
-		unsigned int flags, mode_t mode)
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+	struct spufs_calls *calls = NULL;
+
+	rcu_read_lock();
+	calls = rcu_dereference(spufs_calls);
+	if (calls && !try_module_get(calls->owner))
+		calls = NULL;
+	rcu_read_unlock();
+
+	return calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls)
+{
+	BUG_ON(calls != spufs_calls);
+
+	/* we don't need to rcu this, as we hold a reference to the module */
+	module_put(spufs_calls->owner);
+}
+
+#else /* !defined CONFIG_SPU_FS_MODULE */
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+	return spufs_calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls) { }
+
+#endif /* CONFIG_SPU_FS_MODULE */
+
+SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
+	umode_t, mode, int, neighbor_fd)
 {
 	long ret;
-	struct module *owner = spufs_calls.owner;
+	struct spufs_calls *calls;
 
-	ret = -ENOSYS;
-	if (owner && try_module_get(owner)) {
-		ret = spufs_calls.create_thread(name, flags, mode);
-		module_put(owner);
-	}
+	calls = spufs_calls_get();
+	if (!calls)
+		return -ENOSYS;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		struct fd neighbor = fdget(neighbor_fd);
+		ret = -EBADF;
+		if (neighbor.file) {
+			ret = calls->create_thread(name, flags, mode, neighbor.file);
+			fdput(neighbor);
+		}
+	} else
+		ret = calls->create_thread(name, flags, mode, NULL);
+
+	spufs_calls_put(calls);
 	return ret;
 }
 
 asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
 {
 	long ret;
-	struct file *filp;
-	int fput_needed;
-	struct module *owner = spufs_calls.owner;
+	struct fd arg;
+	struct spufs_calls *calls;
 
-	ret = -ENOSYS;
-	if (owner && try_module_get(owner)) {
-		ret = -EBADF;
-		filp = fget_light(fd, &fput_needed);
-		if (filp) {
-			ret = spufs_calls.spu_run(filp, unpc, ustatus);
-			fput_light(filp, fput_needed);
-		}
-		module_put(owner);
+	calls = spufs_calls_get();
+	if (!calls)
+		return -ENOSYS;
+
+	ret = -EBADF;
+	arg = fdget(fd);
+	if (arg.file) {
+		ret = calls->spu_run(arg.file, unpc, ustatus);
+		fdput(arg);
 	}
+
+	spufs_calls_put(calls);
+	return ret;
+}
+
+#ifdef CONFIG_COREDUMP
+int elf_coredump_extra_notes_size(void)
+{
+	struct spufs_calls *calls;
+	int ret;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return 0;
+
+	ret = calls->coredump_extra_notes_size();
+
+	spufs_calls_put(calls);
+
 	return ret;
 }
 
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+	struct spufs_calls *calls;
+	int ret;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return 0;
+
+	ret = calls->coredump_extra_notes_write(cprm);
+
+	spufs_calls_put(calls);
+
+	return ret;
+}
+#endif
+
+void notify_spus_active(void)
+{
+	struct spufs_calls *calls;
+
+	calls = spufs_calls_get();
+	if (!calls)
+		return;
+
+	calls->notify_spus_active();
+	spufs_calls_put(calls);
+
+	return;
+}
+
 int register_spu_syscalls(struct spufs_calls *calls)
 {
-	if (spufs_calls.owner)
+	if (spufs_calls)
 		return -EBUSY;
 
-	spufs_calls.create_thread = calls->create_thread;
-	spufs_calls.spu_run = calls->spu_run;
-	smp_mb();
-	spufs_calls.owner = calls->owner;
+	rcu_assign_pointer(spufs_calls, calls);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(register_spu_syscalls);
 
 void unregister_spu_syscalls(struct spufs_calls *calls)
 {
-	BUG_ON(spufs_calls.owner != calls->owner);
-	spufs_calls.owner = NULL;
+	BUG_ON(spufs_calls->owner != calls->owner);
+	RCU_INIT_POINTER(spufs_calls, NULL);
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(unregister_spu_syscalls);
diff --git a/arch/powerpc/platforms/cell/spufs/.gitignore b/arch/powerpc/platforms/cell/spufs/.gitignore
new file mode 100644
index 00000000000..a09ee8d84d6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/.gitignore
@@ -0,0 +1,2 @@
+spu_save_dump.h
+spu_restore_dump.h
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
index a7cddf40e3d..52a7d2596d3 100644
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -1,6 +1,12 @@
+
 obj-$(CONFIG_SPU_FS) += spufs.o
-spufs-y += inode.o file.o context.o switch.o syscalls.o
-spufs-y += sched.o backing_ops.o hw_ops.o run.o
+spufs-y += inode.o file.o context.o syscalls.o
+spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
+spufs-y += switch.o fault.o lscsa_alloc.o
+spufs-$(CONFIG_COREDUMP) += coredump.o
+
+# magic for the trace events
+CFLAGS_sched.o := -I$(src)
 
 # Rules to build switch.o with the help of SPU tool chain
 SPU_CROSS	:= spu-
@@ -8,11 +14,12 @@ SPU_CC		:= $(SPU_CROSS)gcc
 SPU_AS		:= $(SPU_CROSS)gcc
 SPU_LD		:= $(SPU_CROSS)ld
 SPU_OBJCOPY	:= $(SPU_CROSS)objcopy
-SPU_CFLAGS	:= -O2 -Wall -I$(srctree)/include -I$(objtree)/include2
-SPU_AFLAGS	:= -c -D__ASSEMBLY__ -I$(srctree)/include -I$(objtree)/include2
+SPU_CFLAGS	:= -O2 -Wall -I$(srctree)/include -D__KERNEL__
+SPU_AFLAGS	:= -c -D__ASSEMBLY__ -I$(srctree)/include -D__KERNEL__
 SPU_LDFLAGS	:= -N -Ttext=0x0
 
 $(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h
+clean-files := spu_save_dump.h spu_restore_dump.h
 
 # Compile SPU files
       cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $<
@@ -45,7 +52,8 @@ cmd_hexdump   = ( \
 		echo " * Hex-dump auto generated from $*.c." ; \
 		echo " * Do not edit!" ; \
 		echo " */" ; \
-		echo "static unsigned int $*_code[] __page_aligned = {" ; \
+		echo "static unsigned int $*_code[] " \
+			"__attribute__((__aligned__(128))) = {" ; \
 		hexdump -v -e '"0x" 4/1 "%02x" "," "\n"' $< ; \
 		echo "};" ; \
 		) > $@
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
index a5c489a53c6..6e8a9ef8590 100644
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -21,15 +21,12 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/config.h>
-#include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 #include <linux/poll.h>
@@ -37,6 +34,7 @@
 #include <asm/io.h>
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
+#include <asm/spu_info.h>
 #include <asm/mmu_context.h>
 #include "spufs.h"
 
@@ -107,16 +105,20 @@ static unsigned int spu_backing_mbox_stat_poll(struct spu_context *ctx,
 		if (stat & 0xff0000)
 			ret |= POLLIN | POLLRDNORM;
 		else {
-			ctx->csa.priv1.int_stat_class0_RW &= ~0x1;
-			ctx->csa.priv1.int_mask_class2_RW |= 0x1;
+			ctx->csa.priv1.int_stat_class2_RW &=
+				~CLASS2_MAILBOX_INTR;
+			ctx->csa.priv1.int_mask_class2_RW |=
+				CLASS2_ENABLE_MAILBOX_INTR;
 		}
 	}
 	if (events & (POLLOUT | POLLWRNORM)) {
 		if (stat & 0x00ff00)
 			ret = POLLOUT | POLLWRNORM;
 		else {
-			ctx->csa.priv1.int_stat_class0_RW &= ~0x10;
-			ctx->csa.priv1.int_mask_class2_RW |= 0x10;
+			ctx->csa.priv1.int_stat_class2_RW &=
+				~CLASS2_MAILBOX_THRESHOLD_INTR;
+			ctx->csa.priv1.int_mask_class2_RW |=
+				CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
 		}
 	}
 	spin_unlock_irq(&ctx->csa.register_lock);
@@ -140,7 +142,7 @@ static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data)
 		ret = 4;
 	} else {
 		/* make sure we get woken up by the interrupt */
-		ctx->csa.priv1.int_mask_class2_RW |= 0x1UL;
+		ctx->csa.priv1.int_mask_class2_RW |= CLASS2_ENABLE_MAILBOX_INTR;
 		ret = 0;
 	}
 	spin_unlock(&ctx->csa.register_lock);
@@ -163,13 +165,15 @@ static int spu_backing_wbox_write(struct spu_context *ctx, u32 data)
 		BUG_ON(avail != (4 - slot));
 		ctx->csa.spu_mailbox_data[slot] = data;
 		ctx->csa.spu_chnlcnt_RW[29] = ++slot;
-		ctx->csa.prob.mb_stat_R = (((4 - slot) & 0xff) << 8);
+		ctx->csa.prob.mb_stat_R &= ~(0x00ff00);
+		ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8);
 		gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT);
 		ret = 4;
 	} else {
 		/* make sure we get woken up by the interrupt when space
 		   becomes available */
-		ctx->csa.priv1.int_mask_class2_RW |= 0x10;
+		ctx->csa.priv1.int_mask_class2_RW |=
+			CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
 		ret = 0;
 	}
 	spin_unlock(&ctx->csa.register_lock);
@@ -268,11 +272,27 @@ static char *spu_backing_get_ls(struct spu_context *ctx)
 	return ctx->csa.lscsa->ls;
 }
 
+static void spu_backing_privcntl_write(struct spu_context *ctx, u64 val)
+{
+	ctx->csa.priv2.spu_privcntl_RW = val;
+}
+
+static u32 spu_backing_runcntl_read(struct spu_context *ctx)
+{
+	return ctx->csa.prob.spu_runcntl_RW;
+}
+
 static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
 {
 	spin_lock(&ctx->csa.register_lock);
 	ctx->csa.prob.spu_runcntl_RW = val;
 	if (val & SPU_RUNCNTL_RUNNABLE) {
+		ctx->csa.prob.spu_status_R &=
+			~SPU_STATUS_STOPPED_BY_STOP &
+			~SPU_STATUS_STOPPED_BY_HALT &
+			~SPU_STATUS_SINGLE_STEP &
+			~SPU_STATUS_INVALID_INSTR &
+			~SPU_STATUS_INVALID_CH;
 		ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING;
 	} else {
 		ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;
@@ -285,6 +305,82 @@ static void spu_backing_runcntl_stop(struct spu_context *ctx)
 	spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP);
 }
 
+static void spu_backing_master_start(struct spu_context *ctx)
+{
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
+}
+
+static void spu_backing_master_stop(struct spu_context *ctx)
+{
+	struct spu_state *csa = &ctx->csa;
+	u64 sr1;
+
+	spin_lock(&csa->register_lock);
+	sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	csa->priv1.mfc_sr1_RW = sr1;
+	spin_unlock(&csa->register_lock);
+}
+
+static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask,
+					u32 mode)
+{
+	struct spu_problem_collapsed *prob = &ctx->csa.prob;
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	if (prob->dma_querytype_RW)
+		goto out;
+	ret = 0;
+	/* FIXME: what are the side-effects of this? */
+	prob->dma_querymask_RW = mask;
+	prob->dma_querytype_RW = mode;
+	/* In the current implementation, the SPU context is always
+	 * acquired in runnable state when new bits are added to the
+	 * mask (tagwait), so it's sufficient just to mask
+	 * dma_tagstatus_R with the 'mask' parameter here.
+	 */
+	ctx->csa.prob.dma_tagstatus_R &= mask;
+out:
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
+static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return ctx->csa.prob.dma_tagstatus_R;
+}
+
+static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return ctx->csa.prob.dma_qstatus_R;
+}
+
+static int spu_backing_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	int ret;
+
+	spin_lock(&ctx->csa.register_lock);
+	ret = -EAGAIN;
+	/* FIXME: set up priv2->puq */
+	spin_unlock(&ctx->csa.register_lock);
+
+	return ret;
+}
+
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+	ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+}
+
 struct spu_context_ops spu_backing_ops = {
 	.mbox_read = spu_backing_mbox_read,
 	.mbox_stat_read = spu_backing_mbox_stat_read,
@@ -303,6 +399,15 @@ struct spu_context_ops spu_backing_ops = {
 	.npc_write = spu_backing_npc_write,
 	.status_read = spu_backing_status_read,
 	.get_ls = spu_backing_get_ls,
+	.privcntl_write = spu_backing_privcntl_write,
+	.runcntl_read = spu_backing_runcntl_read,
 	.runcntl_write = spu_backing_runcntl_write,
 	.runcntl_stop = spu_backing_runcntl_stop,
+	.master_start = spu_backing_master_start,
+	.master_stop = spu_backing_master_stop,
+	.set_mfc_query = spu_backing_set_mfc_query,
+	.read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_backing_get_mfc_free_elements,
+	.send_mfc_command = spu_backing_send_mfc_command,
+	.restart_dma = spu_backing_restart_dma,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 336f238102f..9c6790d17ed 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -23,37 +23,54 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
 #include "spufs.h"
+#include "sputrace.h"
 
-struct spu_context *alloc_spu_context(struct address_space *local_store)
+
+atomic_t nr_spu_contexts = ATOMIC_INIT(0);
+
+struct spu_context *alloc_spu_context(struct spu_gang *gang)
 {
 	struct spu_context *ctx;
-	ctx = kmalloc(sizeof *ctx, GFP_KERNEL);
+	struct timespec ts;
+
+	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
 	if (!ctx)
 		goto out;
 	/* Binding to physical processor deferred
 	 * until spu_activate().
 	 */
-	spu_init_csa(&ctx->csa);
-	if (!ctx->csa.lscsa) {
+	if (spu_init_csa(&ctx->csa))
 		goto out_free;
-	}
 	spin_lock_init(&ctx->mmio_lock);
+	mutex_init(&ctx->mapping_lock);
 	kref_init(&ctx->kref);
-	init_rwsem(&ctx->state_sema);
-	init_MUTEX(&ctx->run_sema);
+	mutex_init(&ctx->state_mutex);
+	mutex_init(&ctx->run_mutex);
 	init_waitqueue_head(&ctx->ibox_wq);
 	init_waitqueue_head(&ctx->wbox_wq);
 	init_waitqueue_head(&ctx->stop_wq);
-	ctx->ibox_fasync = NULL;
-	ctx->wbox_fasync = NULL;
+	init_waitqueue_head(&ctx->mfc_wq);
+	init_waitqueue_head(&ctx->run_wq);
 	ctx->state = SPU_STATE_SAVED;
-	ctx->local_store = local_store;
-	ctx->spu = NULL;
 	ctx->ops = &spu_backing_ops;
 	ctx->owner = get_task_mm(current);
+	INIT_LIST_HEAD(&ctx->rq);
+	INIT_LIST_HEAD(&ctx->aff_list);
+	if (gang)
+		spu_gang_add_ctx(gang, ctx);
+
+	__spu_update_sched_info(ctx);
+	spu_set_timeslice(ctx);
+	ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
+	ktime_get_ts(&ts);
+	ctx->stats.tstamp = timespec_to_ns(&ts);
+
+	atomic_inc(&nr_spu_contexts);
 	goto out;
 out_free:
 	kfree(ctx);
@@ -66,12 +83,18 @@ void destroy_spu_context(struct kref *kref)
 {
 	struct spu_context *ctx;
 	ctx = container_of(kref, struct spu_context, kref);
-	down_write(&ctx->state_sema);
+	spu_context_nospu_trace(destroy_spu_context__enter, ctx);
+	mutex_lock(&ctx->state_mutex);
 	spu_deactivate(ctx);
-	ctx->ibox_fasync = NULL;
-	ctx->wbox_fasync = NULL;
-	up_write(&ctx->state_sema);
+	mutex_unlock(&ctx->state_mutex);
 	spu_fini_csa(&ctx->csa);
+	if (ctx->gang)
+		spu_gang_remove_ctx(ctx->gang, ctx);
+	if (ctx->prof_priv_kref)
+		kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
+	BUG_ON(!list_empty(&ctx->rq));
+	atomic_dec(&nr_spu_contexts);
+	kfree(ctx->switch_log);
 	kfree(ctx);
 }
 
@@ -90,78 +113,76 @@ int put_spu_context(struct spu_context *ctx)
 void spu_forget(struct spu_context *ctx)
 {
 	struct mm_struct *mm;
-	spu_acquire_saved(ctx);
+
+	/*
+	 * This is basically an open-coded spu_acquire_saved, except that
+	 * we don't acquire the state mutex interruptible, and we don't
+	 * want this context to be rescheduled on release.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->state != SPU_STATE_SAVED)
+		spu_deactivate(ctx);
+
 	mm = ctx->owner;
 	ctx->owner = NULL;
 	mmput(mm);
 	spu_release(ctx);
 }
 
-void spu_acquire(struct spu_context *ctx)
-{
-	down_read(&ctx->state_sema);
-}
-
-void spu_release(struct spu_context *ctx)
-{
-	up_read(&ctx->state_sema);
-}
-
 void spu_unmap_mappings(struct spu_context *ctx)
 {
-	unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
+	mutex_lock(&ctx->mapping_lock);
+	if (ctx->local_store)
+		unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
+	if (ctx->mfc)
+		unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1);
+	if (ctx->cntl)
+		unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1);
+	if (ctx->signal1)
+		unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+	if (ctx->signal2)
+		unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+	if (ctx->mss)
+		unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1);
+	if (ctx->psmap)
+		unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1);
+	mutex_unlock(&ctx->mapping_lock);
 }
 
-int spu_acquire_runnable(struct spu_context *ctx)
+/**
+ * spu_acquire_saved - lock spu contex and make sure it is in saved state
+ * @ctx:	spu contex to lock
+ */
+int spu_acquire_saved(struct spu_context *ctx)
 {
-	int ret = 0;
+	int ret;
 
-	down_read(&ctx->state_sema);
-	if (ctx->state == SPU_STATE_RUNNABLE) {
-		ctx->spu->prio = current->prio;
-		return 0;
-	}
-	up_read(&ctx->state_sema);
+	spu_context_nospu_trace(spu_acquire_saved__enter, ctx);
 
-	down_write(&ctx->state_sema);
-	/* ctx is about to be freed, can't acquire any more */
-	if (!ctx->owner) {
-		ret = -EINVAL;
-		goto out;
-	}
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
 
-	if (ctx->state == SPU_STATE_SAVED) {
-		ret = spu_activate(ctx, 0);
-		if (ret)
-			goto out;
-		ctx->state = SPU_STATE_RUNNABLE;
+	if (ctx->state != SPU_STATE_SAVED) {
+		set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
+		spu_deactivate(ctx);
 	}
 
-	downgrade_write(&ctx->state_sema);
-	/* On success, we return holding the lock */
-
-	return ret;
-out:
-	/* Release here, to simplify calling code. */
-	up_write(&ctx->state_sema);
-
-	return ret;
+	return 0;
 }
 
-void spu_acquire_saved(struct spu_context *ctx)
+/**
+ * spu_release_saved - unlock spu context and return it to the runqueue
+ * @ctx:	context to unlock
+ */
+void spu_release_saved(struct spu_context *ctx)
 {
-	down_read(&ctx->state_sema);
-
-	if (ctx->state == SPU_STATE_SAVED)
-		return;
+	BUG_ON(ctx->state != SPU_STATE_SAVED);
 
-	up_read(&ctx->state_sema);
-	down_write(&ctx->state_sema);
+	if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags) &&
+			test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+		spu_activate(ctx, 0);
 
-	if (ctx->state == SPU_STATE_RUNNABLE) {
-		spu_deactivate(ctx);
-		ctx->state = SPU_STATE_SAVED;
-	}
-
-	downgrade_write(&ctx->state_sema);
+	spu_release(ctx);
 }
+
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
new file mode 100644
index 00000000000..be6212ddbf0
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -0,0 +1,211 @@
+/*
+ * SPU core dump code
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/elf.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/coredump.h>
+#include <linux/binfmts.h>
+
+#include <asm/uaccess.h>
+
+#include "spufs.h"
+
+static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
+				size_t size, loff_t *off)
+{
+	u64 data;
+	int ret;
+
+	if (spufs_coredump_read[num].read)
+		return spufs_coredump_read[num].read(ctx, buffer, size, off);
+
+	data = spufs_coredump_read[num].get(ctx);
+	ret = snprintf(buffer, size, "0x%.16llx", data);
+	if (ret >= size)
+		return size;
+	return ++ret; /* count trailing NULL */
+}
+
+static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
+{
+	int i, sz, total = 0;
+	char *name;
+	char fullname[80];
+
+	for (i = 0; spufs_coredump_read[i].name != NULL; i++) {
+		name = spufs_coredump_read[i].name;
+		sz = spufs_coredump_read[i].size;
+
+		sprintf(fullname, "SPU/%d/%s", dfd, name);
+
+		total += sizeof(struct elf_note);
+		total += roundup(strlen(fullname) + 1, 4);
+		total += roundup(sz, 4);
+	}
+
+	return total;
+}
+
+static int match_context(const void *v, struct file *file, unsigned fd)
+{
+	struct spu_context *ctx;
+	if (file->f_op != &spufs_context_fops)
+		return 0;
+	ctx = SPUFS_I(file_inode(file))->i_ctx;
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		return 0;
+	return fd + 1;
+}
+
+/*
+ * The additional architecture-specific notes for Cell are various
+ * context files in the spu context.
+ *
+ * This function iterates over all open file descriptors and sees
+ * if they are a directory in spufs.  In that case we use spufs
+ * internal functionality to dump them without needing to actually
+ * open the files.
+ */
+/*
+ * descriptor table is not shared, so files can't change or go away.
+ */
+static struct spu_context *coredump_next_context(int *fd)
+{
+	struct file *file;
+	int n = iterate_fd(current->files, *fd, match_context, NULL);
+	if (!n)
+		return NULL;
+	*fd = n - 1;
+	file = fcheck(*fd);
+	return SPUFS_I(file_inode(file))->i_ctx;
+}
+
+int spufs_coredump_extra_notes_size(void)
+{
+	struct spu_context *ctx;
+	int size = 0, rc, fd;
+
+	fd = 0;
+	while ((ctx = coredump_next_context(&fd)) != NULL) {
+		rc = spu_acquire_saved(ctx);
+		if (rc)
+			break;
+		rc = spufs_ctx_note_size(ctx, fd);
+		spu_release_saved(ctx);
+		if (rc < 0)
+			break;
+
+		size += rc;
+
+		/* start searching the next fd next time */
+		fd++;
+	}
+
+	return size;
+}
+
+static int spufs_arch_write_note(struct spu_context *ctx, int i,
+				  struct coredump_params *cprm, int dfd)
+{
+	loff_t pos = 0;
+	int sz, rc, total = 0;
+	const int bufsz = PAGE_SIZE;
+	char *name;
+	char fullname[80], *buf;
+	struct elf_note en;
+
+	buf = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	name = spufs_coredump_read[i].name;
+	sz = spufs_coredump_read[i].size;
+
+	sprintf(fullname, "SPU/%d/%s", dfd, name);
+	en.n_namesz = strlen(fullname) + 1;
+	en.n_descsz = sz;
+	en.n_type = NT_SPU;
+
+	if (!dump_emit(cprm, &en, sizeof(en)))
+		goto Eio;
+
+	if (!dump_emit(cprm, fullname, en.n_namesz))
+		goto Eio;
+
+	if (!dump_align(cprm, 4))
+		goto Eio;
+
+	do {
+		rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
+		if (rc > 0) {
+			if (!dump_emit(cprm, buf, rc))
+				goto Eio;
+			total += rc;
+		}
+	} while (rc == bufsz && total < sz);
+
+	if (rc < 0)
+		goto out;
+
+	if (!dump_skip(cprm,
+		       roundup(cprm->written - total + sz, 4) - cprm->written))
+		goto Eio;
+out:
+	free_page((unsigned long)buf);
+	return rc;
+Eio:
+	free_page((unsigned long)buf);
+	return -EIO;
+}
+
+int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+	struct spu_context *ctx;
+	int fd, j, rc;
+
+	fd = 0;
+	while ((ctx = coredump_next_context(&fd)) != NULL) {
+		rc = spu_acquire_saved(ctx);
+		if (rc)
+			return rc;
+
+		for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
+			rc = spufs_arch_write_note(ctx, j, cprm, fd);
+			if (rc) {
+				spu_release_saved(ctx);
+				return rc;
+			}
+		}
+
+		spu_release_saved(ctx);
+
+		/* start searching the next fd next time */
+		fd++;
+	}
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644
index 00000000000..8cb6260cc80
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,191 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/**
+ * Handle an SPE event, depending on context SPU_CREATE_EVENTS_ENABLED flag.
+ *
+ * If the context was created with events, we just set the return event.
+ * Otherwise, send an appropriate signal to the process.
+ */
+static void spufs_handle_event(struct spu_context *ctx,
+				unsigned long ea, int type)
+{
+	siginfo_t info;
+
+	if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+		ctx->event_return |= type;
+		wake_up_all(&ctx->stop_wq);
+		return;
+	}
+
+	memset(&info, 0, sizeof(info));
+
+	switch (type) {
+	case SPE_EVENT_INVALID_DMA:
+		info.si_signo = SIGBUS;
+		info.si_code = BUS_OBJERR;
+		break;
+	case SPE_EVENT_SPE_DATA_STORAGE:
+		info.si_signo = SIGSEGV;
+		info.si_addr = (void __user *)ea;
+		info.si_code = SEGV_ACCERR;
+		ctx->ops->restart_dma(ctx);
+		break;
+	case SPE_EVENT_DMA_ALIGNMENT:
+		info.si_signo = SIGBUS;
+		/* DAR isn't set for an alignment fault :( */
+		info.si_code = BUS_ADRALN;
+		break;
+	case SPE_EVENT_SPE_ERROR:
+		info.si_signo = SIGILL;
+		info.si_addr = (void __user *)(unsigned long)
+			ctx->ops->npc_read(ctx) - 4;
+		info.si_code = ILL_ILLOPC;
+		break;
+	}
+
+	if (info.si_signo)
+		force_sig_info(info.si_signo, &info, current);
+}
+
+int spufs_handle_class0(struct spu_context *ctx)
+{
+	unsigned long stat = ctx->csa.class_0_pending & CLASS0_INTR_MASK;
+
+	if (likely(!stat))
+		return 0;
+
+	if (stat & CLASS0_DMA_ALIGNMENT_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_DMA_ALIGNMENT);
+
+	if (stat & CLASS0_INVALID_DMA_COMMAND_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_INVALID_DMA);
+
+	if (stat & CLASS0_SPU_ERROR_INTR)
+		spufs_handle_event(ctx, ctx->csa.class_0_dar,
+			SPE_EVENT_SPE_ERROR);
+
+	ctx->csa.class_0_pending = 0;
+
+	return -EIO;
+}
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+	u64 ea, dsisr, access;
+	unsigned long flags;
+	unsigned flt = 0;
+	int ret;
+
+	/*
+	 * dar and dsisr get passed from the registers
+	 * to the spu_context, to this function, but not
+	 * back to the spu if it gets scheduled again.
+	 *
+	 * if we don't handle the fault for a saved context
+	 * in time, we can still expect to get the same fault
+	 * the immediately after the context restore.
+	 */
+	ea = ctx->csa.class_1_dar;
+	dsisr = ctx->csa.class_1_dsisr;
+
+	if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+		return 0;
+
+	spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
+
+	pr_debug("ctx %p: ea %016llx, dsisr %016llx state %d\n", ctx, ea,
+		dsisr, ctx->state);
+
+	ctx->stats.hash_flt++;
+	if (ctx->state == SPU_STATE_RUNNABLE)
+		ctx->spu->stats.hash_flt++;
+
+	/* we must not hold the lock when entering spu_handle_mm_fault */
+	spu_release(ctx);
+
+	access = (_PAGE_PRESENT | _PAGE_USER);
+	access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+	local_irq_save(flags);
+	ret = hash_page(ea, access, 0x300);
+	local_irq_restore(flags);
+
+	/* hashing failed, so try the actual fault handler */
+	if (ret)
+		ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt);
+
+	/*
+	 * This is nasty: we need the state_mutex for all the bookkeeping even
+	 * if the syscall was interrupted by a signal. ewww.
+	 */
+	mutex_lock(&ctx->state_mutex);
+
+	/*
+	 * Clear dsisr under ctxt lock after handling the fault, so that
+	 * time slicing will not preempt the context while the page fault
+	 * handler is running. Context switch code removes mappings.
+	 */
+	ctx->csa.class_1_dar = ctx->csa.class_1_dsisr = 0;
+
+	/*
+	 * If we handled the fault successfully and are in runnable
+	 * state, restart the DMA.
+	 * In case of unhandled error report the problem to user space.
+	 */
+	if (!ret) {
+		if (flt & VM_FAULT_MAJOR)
+			ctx->stats.maj_flt++;
+		else
+			ctx->stats.min_flt++;
+		if (ctx->state == SPU_STATE_RUNNABLE) {
+			if (flt & VM_FAULT_MAJOR)
+				ctx->spu->stats.maj_flt++;
+			else
+				ctx->spu->stats.min_flt++;
+		}
+
+		if (ctx->spu)
+			ctx->ops->restart_dma(ctx);
+	} else
+		spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+	return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index dfa649c9b95..90986923a53 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -20,127 +20,504 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#undef DEBUG
+
 #include <linux/fs.h>
 #include <linux/ioctl.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/pagemap.h>
 #include <linux/poll.h>
 #include <linux/ptrace.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <asm/io.h>
-#include <asm/semaphore.h>
+#include <asm/time.h>
 #include <asm/spu.h>
+#include <asm/spu_info.h>
 #include <asm/uaccess.h>
 
 #include "spufs.h"
+#include "sputrace.h"
+
+#define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
+
+/* Simple attribute files */
+struct spufs_attr {
+	int (*get)(void *, u64 *);
+	int (*set)(void *, u64);
+	char get_buf[24];       /* enough to store a u64 and "\n\0" */
+	char set_buf[24];
+	void *data;
+	const char *fmt;        /* format for read operation */
+	struct mutex mutex;     /* protects access to these buffers */
+};
+
+static int spufs_attr_open(struct inode *inode, struct file *file,
+		int (*get)(void *, u64 *), int (*set)(void *, u64),
+		const char *fmt)
+{
+	struct spufs_attr *attr;
+
+	attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+	if (!attr)
+		return -ENOMEM;
+
+	attr->get = get;
+	attr->set = set;
+	attr->data = inode->i_private;
+	attr->fmt = fmt;
+	mutex_init(&attr->mutex);
+	file->private_data = attr;
+
+	return nonseekable_open(inode, file);
+}
+
+static int spufs_attr_release(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+	return 0;
+}
+
+static ssize_t spufs_attr_read(struct file *file, char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct spufs_attr *attr;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+	if (!attr->get)
+		return -EACCES;
+
+	ret = mutex_lock_interruptible(&attr->mutex);
+	if (ret)
+		return ret;
+
+	if (*ppos) {		/* continued read */
+		size = strlen(attr->get_buf);
+	} else {		/* first read */
+		u64 val;
+		ret = attr->get(attr->data, &val);
+		if (ret)
+			goto out;
+
+		size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
+				 attr->fmt, (unsigned long long)val);
+	}
+
+	ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
+out:
+	mutex_unlock(&attr->mutex);
+	return ret;
+}
+
+static ssize_t spufs_attr_write(struct file *file, const char __user *buf,
+		size_t len, loff_t *ppos)
+{
+	struct spufs_attr *attr;
+	u64 val;
+	size_t size;
+	ssize_t ret;
+
+	attr = file->private_data;
+	if (!attr->set)
+		return -EACCES;
+
+	ret = mutex_lock_interruptible(&attr->mutex);
+	if (ret)
+		return ret;
+
+	ret = -EFAULT;
+	size = min(sizeof(attr->set_buf) - 1, len);
+	if (copy_from_user(attr->set_buf, buf, size))
+		goto out;
+
+	ret = len; /* claim we got the whole input */
+	attr->set_buf[size] = '\0';
+	val = simple_strtol(attr->set_buf, NULL, 0);
+	attr->set(attr->data, val);
+out:
+	mutex_unlock(&attr->mutex);
+	return ret;
+}
+
+#define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)	\
+static int __fops ## _open(struct inode *inode, struct file *file)	\
+{									\
+	__simple_attr_check_format(__fmt, 0ull);			\
+	return spufs_attr_open(inode, file, __get, __set, __fmt);	\
+}									\
+static const struct file_operations __fops = {				\
+	.open	 = __fops ## _open,					\
+	.release = spufs_attr_release,					\
+	.read	 = spufs_attr_read,					\
+	.write	 = spufs_attr_write,					\
+	.llseek  = generic_file_llseek,					\
+};
 
 
 static int
 spufs_mem_open(struct inode *inode, struct file *file)
 {
 	struct spufs_inode_info *i = SPUFS_I(inode);
-	file->private_data = i->i_ctx;
-	file->f_mapping = i->i_ctx->local_store;
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->local_store = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
 	return 0;
 }
 
+static int
+spufs_mem_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->local_store = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t
+__spufs_mem_read(struct spu_context *ctx, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	char *local_store = ctx->ops->get_ls(ctx);
+	return simple_read_from_buffer(buffer, size, pos, local_store,
+					LS_SIZE);
+}
+
 static ssize_t
 spufs_mem_read(struct file *file, char __user *buffer,
 				size_t size, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	char *local_store;
-	int ret;
-
-	spu_acquire(ctx);
-
-	local_store = ctx->ops->get_ls(ctx);
-	ret = simple_read_from_buffer(buffer, size, pos, local_store, LS_SIZE);
+	ssize_t ret;
 
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_mem_read(ctx, buffer, size, pos);
 	spu_release(ctx);
+
 	return ret;
 }
 
 static ssize_t
 spufs_mem_write(struct file *file, const char __user *buffer,
-					size_t size, loff_t *pos)
+					size_t size, loff_t *ppos)
 {
 	struct spu_context *ctx = file->private_data;
 	char *local_store;
+	loff_t pos = *ppos;
 	int ret;
 
-	size = min_t(ssize_t, LS_SIZE - *pos, size);
-	if (size <= 0)
+	if (pos > LS_SIZE)
 		return -EFBIG;
-	*pos += size;
 
-	spu_acquire(ctx);
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
 
 	local_store = ctx->ops->get_ls(ctx);
-	ret = copy_from_user(local_store + *pos - size,
-			     buffer, size) ? -EFAULT : size;
-
+	size = simple_write_to_buffer(local_store, LS_SIZE, ppos, buffer, size);
 	spu_release(ctx);
-	return ret;
+
+	return size;
 }
 
-#ifdef CONFIG_SPARSEMEM
-static struct page *
-spufs_mem_mmap_nopage(struct vm_area_struct *vma,
-		      unsigned long address, int *type)
+static int
+spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct page *page = NOPAGE_SIGBUS;
+	struct spu_context *ctx	= vma->vm_file->private_data;
+	unsigned long address = (unsigned long)vmf->virtual_address;
+	unsigned long pfn, offset;
 
-	struct spu_context *ctx = vma->vm_file->private_data;
-	unsigned long offset = address - vma->vm_start;
-	offset += vma->vm_pgoff << PAGE_SHIFT;
+#ifdef CONFIG_SPU_FS_64K_LS
+	struct spu_state *csa = &ctx->csa;
+	int psize;
 
-	spu_acquire(ctx);
+	/* Check what page size we are using */
+	psize = get_slice_psize(vma->vm_mm, address);
 
-	if (ctx->state == SPU_STATE_SAVED)
-		page = vmalloc_to_page(ctx->csa.lscsa->ls + offset);
-	else
-		page = pfn_to_page((ctx->spu->local_store_phys + offset)
-				   >> PAGE_SHIFT);
+	/* Some sanity checking */
+	BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K));
+
+	/* Wow, 64K, cool, we need to align the address though */
+	if (csa->use_big_pages) {
+		BUG_ON(vma->vm_start & 0xffff);
+		address &= ~0xfffful;
+	}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+	offset = vmf->pgoff << PAGE_SHIFT;
+	if (offset >= LS_SIZE)
+		return VM_FAULT_SIGBUS;
+
+	pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n",
+			address, offset);
+
+	if (spu_acquire(ctx))
+		return VM_FAULT_NOPAGE;
+
+	if (ctx->state == SPU_STATE_SAVED) {
+		vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+		pfn = vmalloc_to_pfn(ctx->csa.lscsa->ls + offset);
+	} else {
+		vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+		pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
+	}
+	vm_insert_pfn(vma, address, pfn);
 
 	spu_release(ctx);
 
-	if (type)
-		*type = VM_FAULT_MINOR;
+	return VM_FAULT_NOPAGE;
+}
+
+static int spufs_mem_mmap_access(struct vm_area_struct *vma,
+				unsigned long address,
+				void *buf, int len, int write)
+{
+	struct spu_context *ctx = vma->vm_file->private_data;
+	unsigned long offset = address - vma->vm_start;
+	char *local_store;
 
-	page_cache_get(page);
-	return page;
+	if (write && !(vma->vm_flags & VM_WRITE))
+		return -EACCES;
+	if (spu_acquire(ctx))
+		return -EINTR;
+	if ((offset + len) > vma->vm_end)
+		len = vma->vm_end - offset;
+	local_store = ctx->ops->get_ls(ctx);
+	if (write)
+		memcpy_toio(local_store + offset, buf, len);
+	else
+		memcpy_fromio(buf, local_store + offset, len);
+	spu_release(ctx);
+	return len;
 }
 
-static struct vm_operations_struct spufs_mem_mmap_vmops = {
-	.nopage = spufs_mem_mmap_nopage,
+static const struct vm_operations_struct spufs_mem_mmap_vmops = {
+	.fault = spufs_mem_mmap_fault,
+	.access = spufs_mem_mmap_access,
 };
 
-static int
-spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
+static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
 {
+#ifdef CONFIG_SPU_FS_64K_LS
+	struct spu_context	*ctx = file->private_data;
+	struct spu_state	*csa = &ctx->csa;
+
+	/* Sanity check VMA alignment */
+	if (csa->use_big_pages) {
+		pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx,"
+			 " pgoff=0x%lx\n", vma->vm_start, vma->vm_end,
+			 vma->vm_pgoff);
+		if (vma->vm_start & 0xffff)
+			return -EINVAL;
+		if (vma->vm_pgoff & 0xf)
+			return -EINVAL;
+	}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
 	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
-	/* FIXME: */
-	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot)
-				     | _PAGE_NO_CACHE);
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
 
 	vma->vm_ops = &spufs_mem_mmap_vmops;
 	return 0;
 }
-#endif
 
-static struct file_operations spufs_mem_fops = {
-	.open	 = spufs_mem_open,
-	.read    = spufs_mem_read,
-	.write   = spufs_mem_write,
-	.llseek  = generic_file_llseek,
-#ifdef CONFIG_SPARSEMEM
-	.mmap    = spufs_mem_mmap,
+#ifdef CONFIG_SPU_FS_64K_LS
+static unsigned long spufs_get_unmapped_area(struct file *file,
+		unsigned long addr, unsigned long len, unsigned long pgoff,
+		unsigned long flags)
+{
+	struct spu_context	*ctx = file->private_data;
+	struct spu_state	*csa = &ctx->csa;
+
+	/* If not using big pages, fallback to normal MM g_u_a */
+	if (!csa->use_big_pages)
+		return current->mm->get_unmapped_area(file, addr, len,
+						      pgoff, flags);
+
+	/* Else, try to obtain a 64K pages slice */
+	return slice_get_unmapped_area(addr, len, flags,
+				       MMU_PAGE_64K, 1);
+}
+#endif /* CONFIG_SPU_FS_64K_LS */
+
+static const struct file_operations spufs_mem_fops = {
+	.open			= spufs_mem_open,
+	.release		= spufs_mem_release,
+	.read			= spufs_mem_read,
+	.write			= spufs_mem_write,
+	.llseek			= generic_file_llseek,
+	.mmap			= spufs_mem_mmap,
+#ifdef CONFIG_SPU_FS_64K_LS
+	.get_unmapped_area	= spufs_get_unmapped_area,
 #endif
 };
 
+static int spufs_ps_fault(struct vm_area_struct *vma,
+				    struct vm_fault *vmf,
+				    unsigned long ps_offs,
+				    unsigned long ps_size)
+{
+	struct spu_context *ctx = vma->vm_file->private_data;
+	unsigned long area, offset = vmf->pgoff << PAGE_SHIFT;
+	int ret = 0;
+
+	spu_context_nospu_trace(spufs_ps_fault__enter, ctx);
+
+	if (offset >= ps_size)
+		return VM_FAULT_SIGBUS;
+
+	if (fatal_signal_pending(current))
+		return VM_FAULT_SIGBUS;
+
+	/*
+	 * Because we release the mmap_sem, the context may be destroyed while
+	 * we're in spu_wait. Grab an extra reference so it isn't destroyed
+	 * in the meantime.
+	 */
+	get_spu_context(ctx);
+
+	/*
+	 * We have to wait for context to be loaded before we have
+	 * pages to hand out to the user, but we don't want to wait
+	 * with the mmap_sem held.
+	 * It is possible to drop the mmap_sem here, but then we need
+	 * to return VM_FAULT_NOPAGE because the mappings may have
+	 * hanged.
+	 */
+	if (spu_acquire(ctx))
+		goto refault;
+
+	if (ctx->state == SPU_STATE_SAVED) {
+		up_read(&current->mm->mmap_sem);
+		spu_context_nospu_trace(spufs_ps_fault__sleep, ctx);
+		ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+		spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu);
+		down_read(&current->mm->mmap_sem);
+	} else {
+		area = ctx->spu->problem_phys + ps_offs;
+		vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+					(area + offset) >> PAGE_SHIFT);
+		spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
+	}
+
+	if (!ret)
+		spu_release(ctx);
+
+refault:
+	put_spu_context(ctx);
+	return VM_FAULT_NOPAGE;
+}
+
+#if SPUFS_MMAP_4K
+static int spufs_cntl_mmap_fault(struct vm_area_struct *vma,
+					   struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_cntl_mmap_vmops = {
+	.fault = spufs_cntl_mmap_fault,
+};
+
+/*
+ * mmap support for problem state control area [0x4000 - 0x4fff].
+ */
+static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_cntl_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_cntl_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_cntl_get(void *data, u64 *val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	*val = ctx->ops->status_read(ctx);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int spufs_cntl_set(void *data, u64 val)
+{
+	struct spu_context *ctx = data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->runcntl_write(ctx, val);
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int spufs_cntl_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->cntl = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return simple_attr_open(inode, file, spufs_cntl_get,
+					spufs_cntl_set, "0x%08lx");
+}
+
+static int
+spufs_cntl_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	simple_attr_release(inode, file);
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->cntl = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_cntl_fops = {
+	.open = spufs_cntl_open,
+	.release = spufs_cntl_release,
+	.read = simple_attr_read,
+	.write = simple_attr_write,
+	.llseek	= generic_file_llseek,
+	.mmap = spufs_cntl_mmap,
+};
+
 static int
 spufs_regs_open(struct inode *inode, struct file *file)
 {
@@ -150,19 +527,31 @@ spufs_regs_open(struct inode *inode, struct file *file)
 }
 
 static ssize_t
+__spufs_regs_read(struct spu_context *ctx, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return simple_read_from_buffer(buffer, size, pos,
+				      lscsa->gprs, sizeof lscsa->gprs);
+}
+
+static ssize_t
 spufs_regs_read(struct file *file, char __user *buffer,
 		size_t size, loff_t *pos)
 {
-	struct spu_context *ctx = file->private_data;
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
 	int ret;
+	struct spu_context *ctx = file->private_data;
 
-	spu_acquire_saved(ctx);
-
-	ret = simple_read_from_buffer(buffer, size, pos,
-				      lscsa->gprs, sizeof lscsa->gprs);
+	/* pre-check for file position: if we'd return EOF, there's no point
+	 * causing a deschedule */
+	if (*pos >= sizeof(ctx->csa.lscsa->gprs))
+		return 0;
 
-	spu_release(ctx);
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_regs_read(ctx, buffer, size, pos);
+	spu_release_saved(ctx);
 	return ret;
 }
 
@@ -174,21 +563,21 @@ spufs_regs_write(struct file *file, const char __user *buffer,
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
 	int ret;
 
-	size = min_t(ssize_t, sizeof lscsa->gprs - *pos, size);
-	if (size <= 0)
+	if (*pos >= sizeof(lscsa->gprs))
 		return -EFBIG;
-	*pos += size;
 
-	spu_acquire_saved(ctx);
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
 
-	ret = copy_from_user(lscsa->gprs + *pos - size,
-			     buffer, size) ? -EFAULT : size;
+	size = simple_write_to_buffer(lscsa->gprs, sizeof(lscsa->gprs), pos,
+					buffer, size);
 
-	spu_release(ctx);
-	return ret;
+	spu_release_saved(ctx);
+	return size;
 }
 
-static struct file_operations spufs_regs_fops = {
+static const struct file_operations spufs_regs_fops = {
 	.open	 = spufs_regs_open,
 	.read    = spufs_regs_read,
 	.write   = spufs_regs_write,
@@ -196,19 +585,26 @@ static struct file_operations spufs_regs_fops = {
 };
 
 static ssize_t
+__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer,
+			size_t size, loff_t * pos)
+{
+	struct spu_lscsa *lscsa = ctx->csa.lscsa;
+	return simple_read_from_buffer(buffer, size, pos,
+				      &lscsa->fpcr, sizeof(lscsa->fpcr));
+}
+
+static ssize_t
 spufs_fpcr_read(struct file *file, char __user * buffer,
 		size_t size, loff_t * pos)
 {
-	struct spu_context *ctx = file->private_data;
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
 	int ret;
+	struct spu_context *ctx = file->private_data;
 
-	spu_acquire_saved(ctx);
-
-	ret = simple_read_from_buffer(buffer, size, pos,
-				      &lscsa->fpcr, sizeof(lscsa->fpcr));
-
-	spu_release(ctx);
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_fpcr_read(ctx, buffer, size, pos);
+	spu_release_saved(ctx);
 	return ret;
 }
 
@@ -220,21 +616,21 @@ spufs_fpcr_write(struct file *file, const char __user * buffer,
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
 	int ret;
 
-	size = min_t(ssize_t, sizeof(lscsa->fpcr) - *pos, size);
-	if (size <= 0)
+	if (*pos >= sizeof(lscsa->fpcr))
 		return -EFBIG;
-	*pos += size;
 
-	spu_acquire_saved(ctx);
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
 
-	ret = copy_from_user((char *)&lscsa->fpcr + *pos - size,
-			     buffer, size) ? -EFAULT : size;
+	size = simple_write_to_buffer(&lscsa->fpcr, sizeof(lscsa->fpcr), pos,
+					buffer, size);
 
-	spu_release(ctx);
-	return ret;
+	spu_release_saved(ctx);
+	return size;
 }
 
-static struct file_operations spufs_fpcr_fops = {
+static const struct file_operations spufs_fpcr_fops = {
 	.open = spufs_regs_open,
 	.read = spufs_fpcr_read,
 	.write = spufs_fpcr_write,
@@ -250,44 +646,78 @@ static int spufs_pipe_open(struct inode *inode, struct file *file)
 	return nonseekable_open(inode, file);
 }
 
+/*
+ * Read as many bytes from the mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ */
 static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	u32 mbox_data;
-	int ret;
+	u32 mbox_data, __user *udata;
+	ssize_t count;
 
 	if (len < 4)
 		return -EINVAL;
 
-	spu_acquire(ctx);
-	ret = ctx->ops->mbox_read(ctx, &mbox_data);
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	udata = (void __user *)buf;
+
+	count = spu_acquire(ctx);
+	if (count)
+		return count;
+
+	for (count = 0; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->mbox_read(ctx, &mbox_data);
+		if (ret == 0)
+			break;
+
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = __put_user(mbox_data, udata);
+		if (ret) {
+			if (!count)
+				count = -EFAULT;
+			break;
+		}
+	}
 	spu_release(ctx);
 
-	if (!ret)
-		return -EAGAIN;
+	if (!count)
+		count = -EAGAIN;
 
-	if (copy_to_user(buf, &mbox_data, sizeof mbox_data))
-		return -EFAULT;
-
-	return 4;
+	return count;
 }
 
-static struct file_operations spufs_mbox_fops = {
+static const struct file_operations spufs_mbox_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_mbox_read,
+	.llseek	= no_llseek,
 };
 
 static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
 	u32 mbox_stat;
 
 	if (len < 4)
 		return -EINVAL;
 
-	spu_acquire(ctx);
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
 
 	mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff;
 
@@ -299,9 +729,10 @@ static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
 	return 4;
 }
 
-static struct file_operations spufs_mbox_stat_fops = {
+static const struct file_operations spufs_mbox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_mbox_stat_read,
+	.llseek = no_llseek,
 };
 
 /* low-level ibox access function */
@@ -322,40 +753,81 @@ void spufs_ibox_callback(struct spu *spu)
 {
 	struct spu_context *ctx = spu->ctx;
 
+	if (!ctx)
+		return;
+
 	wake_up_all(&ctx->ibox_wq);
 	kill_fasync(&ctx->ibox_fasync, SIGIO, POLLIN);
 }
 
+/*
+ * Read as many bytes from the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * any data is available, but return when we have been able to
+ * read something.
+ */
 static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	u32 ibox_data;
-	ssize_t ret;
+	u32 ibox_data, __user *udata;
+	ssize_t count;
 
 	if (len < 4)
 		return -EINVAL;
 
-	spu_acquire(ctx);
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
 
-	ret = 0;
+	udata = (void __user *)buf;
+
+	count = spu_acquire(ctx);
+	if (count)
+		goto out;
+
+	/* wait only for the first element */
+	count = 0;
 	if (file->f_flags & O_NONBLOCK) {
-		if (!spu_ibox_read(ctx, &ibox_data))
-			ret = -EAGAIN;
+		if (!spu_ibox_read(ctx, &ibox_data)) {
+			count = -EAGAIN;
+			goto out_unlock;
+		}
 	} else {
-		ret = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+		count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+		if (count)
+			goto out;
 	}
 
-	spu_release(ctx);
-
-	if (ret)
-		return ret;
-
-	ret = 4;
-	if (copy_to_user(buf, &ibox_data, sizeof ibox_data))
-		ret = -EFAULT;
+	/* if we can't write at all, return -EFAULT */
+	count = __put_user(ibox_data, udata);
+	if (count)
+		goto out_unlock;
+
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = ctx->ops->ibox_read(ctx, &ibox_data);
+		if (ret == 0)
+			break;
+		/*
+		 * at the end of the mapped area, we can fault
+		 * but still need to return the data we have
+		 * read successfully so far.
+		 */
+		ret = __put_user(ibox_data, udata);
+		if (ret)
+			break;
+	}
 
-	return ret;
+out_unlock:
+	spu_release(ctx);
+out:
+	return count;
 }
 
 static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait)
@@ -365,30 +837,38 @@ static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, &ctx->ibox_wq, wait);
 
-	spu_acquire(ctx);
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
 	mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM);
 	spu_release(ctx);
 
 	return mask;
 }
 
-static struct file_operations spufs_ibox_fops = {
+static const struct file_operations spufs_ibox_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_ibox_read,
 	.poll	= spufs_ibox_poll,
 	.fasync	= spufs_ibox_fasync,
+	.llseek = no_llseek,
 };
 
 static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
 	u32 ibox_stat;
 
 	if (len < 4)
 		return -EINVAL;
 
-	spu_acquire(ctx);
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
 	ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff;
 	spu_release(ctx);
 
@@ -398,9 +878,10 @@ static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
 	return 4;
 }
 
-static struct file_operations spufs_ibox_stat_fops = {
+static const struct file_operations spufs_ibox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_ibox_stat_read,
+	.llseek = no_llseek,
 };
 
 /* low-level mailbox write */
@@ -424,36 +905,79 @@ void spufs_wbox_callback(struct spu *spu)
 {
 	struct spu_context *ctx = spu->ctx;
 
+	if (!ctx)
+		return;
+
 	wake_up_all(&ctx->wbox_wq);
 	kill_fasync(&ctx->wbox_fasync, SIGIO, POLLOUT);
 }
 
+/*
+ * Write as many bytes to the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - the mailbox is full
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * space is availabyl, but return when we have been able to
+ * write something.
+ */
 static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
-	u32 wbox_data;
-	int ret;
+	u32 wbox_data, __user *udata;
+	ssize_t count;
 
 	if (len < 4)
 		return -EINVAL;
 
-	if (copy_from_user(&wbox_data, buf, sizeof wbox_data))
+	udata = (void __user *)buf;
+	if (!access_ok(VERIFY_READ, buf, len))
 		return -EFAULT;
 
-	spu_acquire(ctx);
+	if (__get_user(wbox_data, udata))
+		return -EFAULT;
 
-	ret = 0;
+	count = spu_acquire(ctx);
+	if (count)
+		goto out;
+
+	/*
+	 * make sure we can at least write one element, by waiting
+	 * in case of !O_NONBLOCK
+	 */
+	count = 0;
 	if (file->f_flags & O_NONBLOCK) {
-		if (!spu_wbox_write(ctx, wbox_data))
-			ret = -EAGAIN;
+		if (!spu_wbox_write(ctx, wbox_data)) {
+			count = -EAGAIN;
+			goto out_unlock;
+		}
 	} else {
-		ret = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+		count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+		if (count)
+			goto out;
 	}
 
-	spu_release(ctx);
 
-	return ret ? ret : sizeof wbox_data;
+	/* write as much as possible */
+	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+		int ret;
+		ret = __get_user(wbox_data, udata);
+		if (ret)
+			break;
+
+		ret = spu_wbox_write(ctx, wbox_data);
+		if (ret == 0)
+			break;
+	}
+
+out_unlock:
+	spu_release(ctx);
+out:
+	return count;
 }
 
 static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait)
@@ -463,30 +987,38 @@ static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, &ctx->wbox_wq, wait);
 
-	spu_acquire(ctx);
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
 	mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM);
 	spu_release(ctx);
 
 	return mask;
 }
 
-static struct file_operations spufs_wbox_fops = {
+static const struct file_operations spufs_wbox_fops = {
 	.open	= spufs_pipe_open,
 	.write	= spufs_wbox_write,
 	.poll	= spufs_wbox_poll,
 	.fasync	= spufs_wbox_fasync,
+	.llseek = no_llseek,
 };
 
 static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx = file->private_data;
+	ssize_t ret;
 	u32 wbox_stat;
 
 	if (len < 4)
 		return -EINVAL;
 
-	spu_acquire(ctx);
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
 	wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff;
 	spu_release(ctx);
 
@@ -496,34 +1028,82 @@ static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
 	return 4;
 }
 
-static struct file_operations spufs_wbox_stat_fops = {
+static const struct file_operations spufs_wbox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_wbox_stat_read,
+	.llseek = no_llseek,
 };
 
-static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
+static int spufs_signal1_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->signal1 = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal1_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal1 = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
 			size_t len, loff_t *pos)
 {
-	struct spu_context *ctx = file->private_data;
+	int ret = 0;
 	u32 data;
 
 	if (len < 4)
 		return -EINVAL;
 
-	spu_acquire(ctx);
-	data = ctx->ops->signal1_read(ctx);
-	spu_release(ctx);
+	if (ctx->csa.spu_chnlcnt_RW[3]) {
+		data = ctx->csa.spu_chnldata_RW[3];
+		ret = 4;
+	}
+
+	if (!ret)
+		goto out;
 
 	if (copy_to_user(buf, &data, 4))
 		return -EFAULT;
 
-	return 4;
+out:
+	return ret;
+}
+
+static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_signal1_read(ctx, buf, len, pos);
+	spu_release_saved(ctx);
+
+	return ret;
 }
 
 static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx;
+	ssize_t ret;
 	u32 data;
 
 	ctx = file->private_data;
@@ -534,44 +1114,133 @@ static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
 	if (copy_from_user(&data, buf, 4))
 		return -EFAULT;
 
-	spu_acquire(ctx);
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
 	ctx->ops->signal1_write(ctx, data);
 	spu_release(ctx);
 
 	return 4;
 }
 
-static struct file_operations spufs_signal1_fops = {
-	.open = spufs_pipe_open,
+static int
+spufs_signal1_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+	return spufs_ps_fault(vma, vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal1_mmap_vmops = {
+	.fault = spufs_signal1_mmap_fault,
+};
+
+static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_signal1_mmap_vmops;
+	return 0;
+}
+
+static const struct file_operations spufs_signal1_fops = {
+	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
 	.read = spufs_signal1_read,
 	.write = spufs_signal1_write,
+	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
 };
 
-static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
+static const struct file_operations spufs_signal1_nosched_fops = {
+	.open = spufs_signal1_open,
+	.release = spufs_signal1_release,
+	.write = spufs_signal1_write,
+	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
+};
+
+static int spufs_signal2_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->signal2 = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal2_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->signal2 = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
 			size_t len, loff_t *pos)
 {
-	struct spu_context *ctx;
+	int ret = 0;
 	u32 data;
 
-	ctx = file->private_data;
-
 	if (len < 4)
 		return -EINVAL;
 
-	spu_acquire(ctx);
-	data = ctx->ops->signal2_read(ctx);
-	spu_release(ctx);
+	if (ctx->csa.spu_chnlcnt_RW[4]) {
+		data =  ctx->csa.spu_chnldata_RW[4];
+		ret = 4;
+	}
+
+	if (!ret)
+		goto out;
 
 	if (copy_to_user(buf, &data, 4))
 		return -EFAULT;
 
-	return 4;
+out:
+	return ret;
+}
+
+static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
+			size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	ret = __spufs_signal2_read(ctx, buf, len, pos);
+	spu_release_saved(ctx);
+
+	return ret;
 }
 
 static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
 			size_t len, loff_t *pos)
 {
 	struct spu_context *ctx;
+	ssize_t ret;
 	u32 data;
 
 	ctx = file->private_data;
@@ -582,213 +1251,1523 @@ static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
 	if (copy_from_user(&data, buf, 4))
 		return -EFAULT;
 
-	spu_acquire(ctx);
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
 	ctx->ops->signal2_write(ctx, data);
 	spu_release(ctx);
 
 	return 4;
 }
 
-static struct file_operations spufs_signal2_fops = {
-	.open = spufs_pipe_open,
+#if SPUFS_MMAP_4K
+static int
+spufs_signal2_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+	return spufs_ps_fault(vma, vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+	/* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+	 * signal 1 and 2 area
+	 */
+	return spufs_ps_fault(vma, vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal2_mmap_vmops = {
+	.fault = spufs_signal2_mmap_fault,
+};
+
+static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_signal2_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_signal2_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static const struct file_operations spufs_signal2_fops = {
+	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
 	.read = spufs_signal2_read,
 	.write = spufs_signal2_write,
+	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal2_nosched_fops = {
+	.open = spufs_signal2_open,
+	.release = spufs_signal2_release,
+	.write = spufs_signal2_write,
+	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
 };
 
-static void spufs_signal1_type_set(void *data, u64 val)
+/*
+ * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the
+ * work of acquiring (or not) the SPU context before calling through
+ * to the actual get routine. The set routine is called directly.
+ */
+#define SPU_ATTR_NOACQUIRE	0
+#define SPU_ATTR_ACQUIRE	1
+#define SPU_ATTR_ACQUIRE_SAVED	2
+
+#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire)	\
+static int __##__get(void *data, u64 *val)				\
+{									\
+	struct spu_context *ctx = data;					\
+	int ret = 0;							\
+									\
+	if (__acquire == SPU_ATTR_ACQUIRE) {				\
+		ret = spu_acquire(ctx);					\
+		if (ret)						\
+			return ret;					\
+		*val = __get(ctx);					\
+		spu_release(ctx);					\
+	} else if (__acquire == SPU_ATTR_ACQUIRE_SAVED)	{		\
+		ret = spu_acquire_saved(ctx);				\
+		if (ret)						\
+			return ret;					\
+		*val = __get(ctx);					\
+		spu_release_saved(ctx);					\
+	} else								\
+		*val = __get(ctx);					\
+									\
+	return 0;							\
+}									\
+DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt);
+
+static int spufs_signal1_type_set(void *data, u64 val)
 {
 	struct spu_context *ctx = data;
+	int ret;
 
-	spu_acquire(ctx);
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
 	ctx->ops->signal1_type_set(ctx, val);
 	spu_release(ctx);
+
+	return 0;
 }
 
-static u64 spufs_signal1_type_get(void *data)
+static u64 spufs_signal1_type_get(struct spu_context *ctx)
+{
+	return ctx->ops->signal1_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get,
+		       spufs_signal1_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+
+static int spufs_signal2_type_set(void *data, u64 val)
 {
 	struct spu_context *ctx = data;
-	u64 ret;
+	int ret;
 
-	spu_acquire(ctx);
-	ret = ctx->ops->signal1_type_get(ctx);
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->signal2_type_set(ctx, val);
 	spu_release(ctx);
 
-	return ret;
+	return 0;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get,
-					spufs_signal1_type_set, "%llu");
 
-static void spufs_signal2_type_set(void *data, u64 val)
+static u64 spufs_signal2_type_get(struct spu_context *ctx)
 {
-	struct spu_context *ctx = data;
+	return ctx->ops->signal2_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
+		       spufs_signal2_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
 
-	spu_acquire(ctx);
-	ctx->ops->signal2_type_set(ctx, val);
-	spu_release(ctx);
+#if SPUFS_MMAP_4K
+static int
+spufs_mss_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
 }
 
-static u64 spufs_signal2_type_get(void *data)
+static const struct vm_operations_struct spufs_mss_mmap_vmops = {
+	.fault = spufs_mss_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct spu_context *ctx = data;
-	u64 ret;
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_mss_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mss_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mss_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	file->private_data = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!i->i_openers++)
+		ctx->mss = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
 
-	spu_acquire(ctx);
-	ret = ctx->ops->signal2_type_get(ctx);
+static int
+spufs_mss_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mss = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_mss_fops = {
+	.open	 = spufs_mss_open,
+	.release = spufs_mss_release,
+	.mmap	 = spufs_mss_mmap,
+	.llseek  = no_llseek,
+};
+
+static int
+spufs_psmap_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_PS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_psmap_mmap_vmops = {
+	.fault = spufs_psmap_mmap_fault,
+};
+
+/*
+ * mmap support for full problem state area [0x00000 - 0x1ffff].
+ */
+static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_psmap_mmap_vmops;
+	return 0;
+}
+
+static int spufs_psmap_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = i->i_ctx;
+	if (!i->i_openers++)
+		ctx->psmap = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_psmap_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->psmap = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+static const struct file_operations spufs_psmap_fops = {
+	.open	 = spufs_psmap_open,
+	.release = spufs_psmap_release,
+	.mmap	 = spufs_psmap_mmap,
+	.llseek  = no_llseek,
+};
+
+
+#if SPUFS_MMAP_4K
+static int
+spufs_mfc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return spufs_ps_fault(vma, vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mfc_mmap_vmops = {
+	.fault = spufs_mfc_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &spufs_mfc_mmap_vmops;
+	return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mfc_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mfc_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	/* we don't want to deal with DMA into other processes */
+	if (ctx->owner != current->mm)
+		return -EINVAL;
+
+	if (atomic_read(&inode->i_count) != 1)
+		return -EBUSY;
+
+	mutex_lock(&ctx->mapping_lock);
+	file->private_data = ctx;
+	if (!i->i_openers++)
+		ctx->mfc = inode->i_mapping;
+	mutex_unlock(&ctx->mapping_lock);
+	return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mfc_release(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+
+	mutex_lock(&ctx->mapping_lock);
+	if (!--i->i_openers)
+		ctx->mfc = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+	return 0;
+}
+
+/* interrupt-level mfc callback function. */
+void spufs_mfc_callback(struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	if (!ctx)
+		return;
+
+	wake_up_all(&ctx->mfc_wq);
+
+	pr_debug("%s %s\n", __func__, spu->name);
+	if (ctx->mfc_fasync) {
+		u32 free_elements, tagstatus;
+		unsigned int mask;
+
+		/* no need for spu_acquire in interrupt context */
+		free_elements = ctx->ops->get_mfc_free_elements(ctx);
+		tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+
+		mask = 0;
+		if (free_elements & 0xffff)
+			mask |= POLLOUT;
+		if (tagstatus & ctx->tagwait)
+			mask |= POLLIN;
+
+		kill_fasync(&ctx->mfc_fasync, SIGIO, mask);
+	}
+}
+
+static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status)
+{
+	/* See if there is one tag group is complete */
+	/* FIXME we need locking around tagwait */
+	*status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait;
+	ctx->tagwait &= ~*status;
+	if (*status)
+		return 1;
+
+	/* enable interrupt waiting for any tag group,
+	   may silently fail if interrupts are already enabled */
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+	return 0;
+}
+
+static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret = -EINVAL;
+	u32 status;
+
+	if (size != 4)
+		goto out;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	ret = -EINVAL;
+	if (file->f_flags & O_NONBLOCK) {
+		status = ctx->ops->read_mfc_tagstatus(ctx);
+		if (!(status & ctx->tagwait))
+			ret = -EAGAIN;
+		else
+			/* XXX(hch): shouldn't we clear ret here? */
+			ctx->tagwait &= ~status;
+	} else {
+		ret = spufs_wait(ctx->mfc_wq,
+			   spufs_read_mfc_tagstatus(ctx, &status));
+		if (ret)
+			goto out;
+	}
 	spu_release(ctx);
 
+	ret = 4;
+	if (copy_to_user(buffer, &status, 4))
+		ret = -EFAULT;
+
+out:
 	return ret;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
-					spufs_signal2_type_set, "%llu");
 
-static void spufs_npc_set(void *data, u64 val)
+static int spufs_check_valid_dma(struct mfc_dma_command *cmd)
 {
-	struct spu_context *ctx = data;
-	spu_acquire(ctx);
-	ctx->ops->npc_write(ctx, val);
-	spu_release(ctx);
+	pr_debug("queueing DMA %x %llx %x %x %x\n", cmd->lsa,
+		 cmd->ea, cmd->size, cmd->tag, cmd->cmd);
+
+	switch (cmd->cmd) {
+	case MFC_PUT_CMD:
+	case MFC_PUTF_CMD:
+	case MFC_PUTB_CMD:
+	case MFC_GET_CMD:
+	case MFC_GETF_CMD:
+	case MFC_GETB_CMD:
+		break;
+	default:
+		pr_debug("invalid DMA opcode %x\n", cmd->cmd);
+		return -EIO;
+	}
+
+	if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) {
+		pr_debug("invalid DMA alignment, ea %llx lsa %x\n",
+				cmd->ea, cmd->lsa);
+		return -EIO;
+	}
+
+	switch (cmd->size & 0xf) {
+	case 1:
+		break;
+	case 2:
+		if (cmd->lsa & 1)
+			goto error;
+		break;
+	case 4:
+		if (cmd->lsa & 3)
+			goto error;
+		break;
+	case 8:
+		if (cmd->lsa & 7)
+			goto error;
+		break;
+	case 0:
+		if (cmd->lsa & 15)
+			goto error;
+		break;
+	error:
+	default:
+		pr_debug("invalid DMA alignment %x for size %x\n",
+			cmd->lsa & 0xf, cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->size > 16 * 1024) {
+		pr_debug("invalid DMA size %x\n", cmd->size);
+		return -EIO;
+	}
+
+	if (cmd->tag & 0xfff0) {
+		/* we reserve the higher tag numbers for kernel use */
+		pr_debug("invalid DMA tag\n");
+		return -EIO;
+	}
+
+	if (cmd->class) {
+		/* not supported in this version */
+		pr_debug("invalid DMA class\n");
+		return -EIO;
+	}
+
+	return 0;
 }
 
-static u64 spufs_npc_get(void *data)
+static int spu_send_mfc_command(struct spu_context *ctx,
+				struct mfc_dma_command cmd,
+				int *error)
 {
-	struct spu_context *ctx = data;
-	u64 ret;
-	spu_acquire(ctx);
-	ret = ctx->ops->npc_read(ctx);
+	*error = ctx->ops->send_mfc_command(ctx, &cmd);
+	if (*error == -EAGAIN) {
+		/* wait for any tag group to complete
+		   so we have space for the new command */
+		ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+		/* try again, because the queue might be
+		   empty again */
+		*error = ctx->ops->send_mfc_command(ctx, &cmd);
+		if (*error == -EAGAIN)
+			return 0;
+	}
+	return 1;
+}
+
+static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
+			size_t size, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	struct mfc_dma_command cmd;
+	int ret = -EINVAL;
+
+	if (size != sizeof cmd)
+		goto out;
+
+	ret = -EFAULT;
+	if (copy_from_user(&cmd, buffer, sizeof cmd))
+		goto out;
+
+	ret = spufs_check_valid_dma(&cmd);
+	if (ret)
+		goto out;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out;
+
+	ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+	if (ret)
+		goto out;
+
+	if (file->f_flags & O_NONBLOCK) {
+		ret = ctx->ops->send_mfc_command(ctx, &cmd);
+	} else {
+		int status;
+		ret = spufs_wait(ctx->mfc_wq,
+				 spu_send_mfc_command(ctx, cmd, &status));
+		if (ret)
+			goto out;
+		if (status)
+			ret = status;
+	}
+
+	if (ret)
+		goto out_unlock;
+
+	ctx->tagwait |= 1 << cmd.tag;
+	ret = size;
+
+out_unlock:
 	spu_release(ctx);
+out:
 	return ret;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, "%llx\n")
 
-static void spufs_decr_set(void *data, u64 val)
+static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait)
 {
-	struct spu_context *ctx = data;
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	spu_acquire_saved(ctx);
-	lscsa->decr.slot[0] = (u32) val;
+	struct spu_context *ctx = file->private_data;
+	u32 free_elements, tagstatus;
+	unsigned int mask;
+
+	poll_wait(file, &ctx->mfc_wq, wait);
+
+	/*
+	 * For now keep this uninterruptible and also ignore the rule
+	 * that poll should not sleep.  Will be fixed later.
+	 */
+	mutex_lock(&ctx->state_mutex);
+	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2);
+	free_elements = ctx->ops->get_mfc_free_elements(ctx);
+	tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
 	spu_release(ctx);
+
+	mask = 0;
+	if (free_elements & 0xffff)
+		mask |= POLLOUT | POLLWRNORM;
+	if (tagstatus & ctx->tagwait)
+		mask |= POLLIN | POLLRDNORM;
+
+	pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__,
+		free_elements, tagstatus, ctx->tagwait);
+
+	return mask;
 }
 
-static u64 spufs_decr_get(void *data)
+static int spufs_mfc_flush(struct file *file, fl_owner_t id)
 {
-	struct spu_context *ctx = data;
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	u64 ret;
-	spu_acquire_saved(ctx);
-	ret = lscsa->decr.slot[0];
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		goto out;
+#if 0
+/* this currently hangs */
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2));
+	if (ret)
+		goto out;
+	ret = spufs_wait(ctx->mfc_wq,
+			 ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
+	if (ret)
+		goto out;
+#else
+	ret = 0;
+#endif
 	spu_release(ctx);
+out:
 	return ret;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
-			"%llx\n")
 
-static void spufs_decr_status_set(void *data, u64 val)
+static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+	struct inode *inode = file_inode(file);
+	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (!err) {
+		mutex_lock(&inode->i_mutex);
+		err = spufs_mfc_flush(file, NULL);
+		mutex_unlock(&inode->i_mutex);
+	}
+	return err;
+}
+
+static int spufs_mfc_fasync(int fd, struct file *file, int on)
+{
+	struct spu_context *ctx = file->private_data;
+
+	return fasync_helper(fd, file, on, &ctx->mfc_fasync);
+}
+
+static const struct file_operations spufs_mfc_fops = {
+	.open	 = spufs_mfc_open,
+	.release = spufs_mfc_release,
+	.read	 = spufs_mfc_read,
+	.write	 = spufs_mfc_write,
+	.poll	 = spufs_mfc_poll,
+	.flush	 = spufs_mfc_flush,
+	.fsync	 = spufs_mfc_fsync,
+	.fasync	 = spufs_mfc_fasync,
+	.mmap	 = spufs_mfc_mmap,
+	.llseek  = no_llseek,
+};
+
+static int spufs_npc_set(void *data, u64 val)
 {
 	struct spu_context *ctx = data;
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	spu_acquire_saved(ctx);
-	lscsa->decr_status.slot[0] = (u32) val;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+	ctx->ops->npc_write(ctx, val);
 	spu_release(ctx);
+
+	return 0;
 }
 
-static u64 spufs_decr_status_get(void *data)
+static u64 spufs_npc_get(struct spu_context *ctx)
+{
+	return ctx->ops->npc_read(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE);
+
+static int spufs_decr_set(void *data, u64 val)
 {
 	struct spu_context *ctx = data;
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	u64 ret;
-	spu_acquire_saved(ctx);
-	ret = lscsa->decr_status.slot[0];
-	spu_release(ctx);
-	return ret;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	lscsa->decr.slot[0] = (u32) val;
+	spu_release_saved(ctx);
+
+	return 0;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
-			spufs_decr_status_set, "%llx\n")
 
-static void spufs_spu_tag_mask_set(void *data, u64 val)
+static u64 spufs_decr_get(struct spu_context *ctx)
 {
-	struct spu_context *ctx = data;
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	spu_acquire_saved(ctx);
-	lscsa->tag_mask.slot[0] = (u32) val;
-	spu_release(ctx);
+	return lscsa->decr.slot[0];
 }
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED);
 
-static u64 spufs_spu_tag_mask_get(void *data)
+static int spufs_decr_status_set(void *data, u64 val)
 {
 	struct spu_context *ctx = data;
-	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	u64 ret;
-	spu_acquire_saved(ctx);
-	ret = lscsa->tag_mask.slot[0];
-	spu_release(ctx);
-	return ret;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	if (val)
+		ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
+	else
+		ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
+	spu_release_saved(ctx);
+
+	return 0;
+}
+
+static u64 spufs_decr_status_get(struct spu_context *ctx)
+{
+	if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
+		return SPU_DECR_STATUS_RUNNING;
+	else
+		return 0;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_spu_tag_mask_ops, spufs_spu_tag_mask_get,
-			spufs_spu_tag_mask_set, "%llx\n")
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
+		       spufs_decr_status_set, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
 
-static void spufs_event_mask_set(void *data, u64 val)
+static int spufs_event_mask_set(void *data, u64 val)
 {
 	struct spu_context *ctx = data;
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	spu_acquire_saved(ctx);
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
 	lscsa->event_mask.slot[0] = (u32) val;
-	spu_release(ctx);
+	spu_release_saved(ctx);
+
+	return 0;
 }
 
-static u64 spufs_event_mask_get(void *data)
+static u64 spufs_event_mask_get(struct spu_context *ctx)
 {
-	struct spu_context *ctx = data;
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	u64 ret;
-	spu_acquire_saved(ctx);
-	ret = lscsa->event_mask.slot[0];
-	spu_release(ctx);
-	return ret;
+	return lscsa->event_mask.slot[0];
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
+		       spufs_event_mask_set, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static u64 spufs_event_status_get(struct spu_context *ctx)
+{
+	struct spu_state *state = &ctx->csa;
+	u64 stat;
+	stat = state->spu_chnlcnt_RW[0];
+	if (stat)
+		return state->spu_chnldata_RW[0];
+	return 0;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
-			spufs_event_mask_set, "%llx\n")
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
+		       NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
 
-static void spufs_srr0_set(void *data, u64 val)
+static int spufs_srr0_set(void *data, u64 val)
 {
 	struct spu_context *ctx = data;
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	spu_acquire_saved(ctx);
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
 	lscsa->srr0.slot[0] = (u32) val;
-	spu_release(ctx);
+	spu_release_saved(ctx);
+
+	return 0;
 }
 
-static u64 spufs_srr0_get(void *data)
+static u64 spufs_srr0_get(struct spu_context *ctx)
 {
-	struct spu_context *ctx = data;
 	struct spu_lscsa *lscsa = ctx->csa.lscsa;
-	u64 ret;
-	spu_acquire_saved(ctx);
-	ret = lscsa->srr0.slot[0];
-	spu_release(ctx);
+	return lscsa->srr0.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
+		       "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static u64 spufs_id_get(struct spu_context *ctx)
+{
+	u64 num;
+
+	if (ctx->state == SPU_STATE_RUNNABLE)
+		num = ctx->spu->number;
+	else
+		num = (unsigned int)-1;
+
+	return num;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE)
+
+static u64 spufs_object_id_get(struct spu_context *ctx)
+{
+	/* FIXME: Should there really be no locking here? */
+	return ctx->object_id;
+}
+
+static int spufs_object_id_set(void *data, u64 id)
+{
+	struct spu_context *ctx = data;
+	ctx->object_id = id;
+
+	return 0;
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get,
+		       spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE);
+
+static u64 spufs_lslr_get(struct spu_context *ctx)
+{
+	return ctx->csa.priv2.spu_lslr_RW;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n",
+		       SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_info_open(struct inode *inode, struct file *file)
+{
+	struct spufs_inode_info *i = SPUFS_I(inode);
+	struct spu_context *ctx = i->i_ctx;
+	file->private_data = ctx;
+	return 0;
+}
+
+static int spufs_caps_show(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	if (!(ctx->flags & SPU_CREATE_NOSCHED))
+		seq_puts(s, "sched\n");
+	if (!(ctx->flags & SPU_CREATE_ISOLATE))
+		seq_puts(s, "step\n");
+	return 0;
+}
+
+static int spufs_caps_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_caps_fops = {
+	.open		= spufs_caps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	u32 data;
+
+	/* EOF if there's no entry in the mbox */
+	if (!(ctx->csa.prob.mb_stat_R & 0x0000ff))
+		return 0;
+
+	data = ctx->csa.prob.pu_mb_R;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	int ret;
+	struct spu_context *ctx = file->private_data;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_mbox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
 	return ret;
 }
-DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
-			"%llx\n")
 
-struct tree_descr spufs_dir_contents[] = {
-	{ "mem",  &spufs_mem_fops,  0666, },
-	{ "regs", &spufs_regs_fops,  0666, },
+static const struct file_operations spufs_mbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_mbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_ibox_info_read(struct spu_context *ctx,
+				char __user *buf, size_t len, loff_t *pos)
+{
+	u32 data;
+
+	/* EOF if there's no entry in the ibox */
+	if (!(ctx->csa.prob.mb_stat_R & 0xff0000))
+		return 0;
+
+	data = ctx->csa.priv2.puint_mb_R;
+
+	return simple_read_from_buffer(buf, len, pos, &data, sizeof data);
+}
+
+static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_ibox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_ibox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_ibox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_wbox_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	int i, cnt;
+	u32 data[4];
+	u32 wbox_stat;
+
+	wbox_stat = ctx->csa.prob.mb_stat_R;
+	cnt = 4 - ((wbox_stat & 0x00ff00) >> 8);
+	for (i = 0; i < cnt; i++) {
+		data[i] = ctx->csa.spu_mailbox_data[i];
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &data,
+				cnt * sizeof(u32));
+}
+
+static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_wbox_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_wbox_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_wbox_info_read,
+	.llseek  = generic_file_llseek,
+};
+
+static ssize_t __spufs_dma_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	struct spu_dma_info info;
+	struct mfc_cq_sr *qp, *spuqp;
+	int i;
+
+	info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
+	info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
+	info.dma_info_status = ctx->csa.spu_chnldata_RW[24];
+	info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
+	info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+	for (i = 0; i < 16; i++) {
+		qp = &info.dma_info_command_data[i];
+		spuqp = &ctx->csa.priv2.spuq[i];
+
+		qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW;
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof info);
+}
+
+static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
+			      size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_dma_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_dma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_dma_info_read,
+	.llseek = no_llseek,
+};
+
+static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx,
+			char __user *buf, size_t len, loff_t *pos)
+{
+	struct spu_proxydma_info info;
+	struct mfc_cq_sr *qp, *puqp;
+	int ret = sizeof info;
+	int i;
+
+	if (len < ret)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, len))
+		return -EFAULT;
+
+	info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
+	info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
+	info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
+	for (i = 0; i < 8; i++) {
+		qp = &info.proxydma_info_command_data[i];
+		puqp = &ctx->csa.priv2.puq[i];
+
+		qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW;
+		qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW;
+		qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW;
+		qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW;
+	}
+
+	return simple_read_from_buffer(buf, len, pos, &info,
+				sizeof info);
+}
+
+static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
+				   size_t len, loff_t *pos)
+{
+	struct spu_context *ctx = file->private_data;
+	int ret;
+
+	ret = spu_acquire_saved(ctx);
+	if (ret)
+		return ret;
+	spin_lock(&ctx->csa.register_lock);
+	ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
+	spin_unlock(&ctx->csa.register_lock);
+	spu_release_saved(ctx);
+
+	return ret;
+}
+
+static const struct file_operations spufs_proxydma_info_fops = {
+	.open = spufs_info_open,
+	.read = spufs_proxydma_info_read,
+	.llseek = no_llseek,
+};
+
+static int spufs_show_tid(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	seq_printf(s, "%d\n", ctx->tid);
+	return 0;
+}
+
+static int spufs_tid_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_tid_fops = {
+	.open		= spufs_tid_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const char *ctx_state_names[] = {
+	"user", "system", "iowait", "loaded"
+};
+
+static unsigned long long spufs_acct_time(struct spu_context *ctx,
+		enum spu_utilization_state state)
+{
+	struct timespec ts;
+	unsigned long long time = ctx->stats.times[state];
+
+	/*
+	 * In general, utilization statistics are updated by the controlling
+	 * thread as the spu context moves through various well defined
+	 * state transitions, but if the context is lazily loaded its
+	 * utilization statistics are not updated as the controlling thread
+	 * is not tightly coupled with the execution of the spu context.  We
+	 * calculate and apply the time delta from the last recorded state
+	 * of the spu context.
+	 */
+	if (ctx->spu && ctx->stats.util_state == state) {
+		ktime_get_ts(&ts);
+		time += timespec_to_ns(&ts) - ctx->stats.tstamp;
+	}
+
+	return time / NSEC_PER_MSEC;
+}
+
+static unsigned long long spufs_slb_flts(struct spu_context *ctx)
+{
+	unsigned long long slb_flts = ctx->stats.slb_flt;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		slb_flts += (ctx->spu->stats.slb_flt -
+			     ctx->stats.slb_flt_base);
+	}
+
+	return slb_flts;
+}
+
+static unsigned long long spufs_class2_intrs(struct spu_context *ctx)
+{
+	unsigned long long class2_intrs = ctx->stats.class2_intr;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		class2_intrs += (ctx->spu->stats.class2_intr -
+				 ctx->stats.class2_intr_base);
+	}
+
+	return class2_intrs;
+}
+
+
+static int spufs_show_stat(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+	int ret;
+
+	ret = spu_acquire(ctx);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		ctx_state_names[ctx->stats.util_state],
+		spufs_acct_time(ctx, SPU_UTIL_USER),
+		spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
+		spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
+		spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
+		ctx->stats.vol_ctx_switch,
+		ctx->stats.invol_ctx_switch,
+		spufs_slb_flts(ctx),
+		ctx->stats.hash_flt,
+		ctx->stats.min_flt,
+		ctx->stats.maj_flt,
+		spufs_class2_intrs(ctx),
+		ctx->stats.libassist);
+	spu_release(ctx);
+	return 0;
+}
+
+static int spufs_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_stat_fops = {
+	.open		= spufs_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static inline int spufs_switch_log_used(struct spu_context *ctx)
+{
+	return (ctx->switch_log->head - ctx->switch_log->tail) %
+		SWITCH_LOG_BUFSIZE;
+}
+
+static inline int spufs_switch_log_avail(struct spu_context *ctx)
+{
+	return SWITCH_LOG_BUFSIZE - spufs_switch_log_used(ctx);
+}
+
+static int spufs_switch_log_open(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int rc;
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	if (ctx->switch_log) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	ctx->switch_log = kmalloc(sizeof(struct switch_log) +
+		SWITCH_LOG_BUFSIZE * sizeof(struct switch_log_entry),
+		GFP_KERNEL);
+
+	if (!ctx->switch_log) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	ctx->switch_log->head = ctx->switch_log->tail = 0;
+	init_waitqueue_head(&ctx->switch_log->wait);
+	rc = 0;
+
+out:
+	spu_release(ctx);
+	return rc;
+}
+
+static int spufs_switch_log_release(struct inode *inode, struct file *file)
+{
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int rc;
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	kfree(ctx->switch_log);
+	ctx->switch_log = NULL;
+	spu_release(ctx);
+
+	return 0;
+}
+
+static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
+{
+	struct switch_log_entry *p;
+
+	p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
+
+	return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n",
+			(unsigned int) p->tstamp.tv_sec,
+			(unsigned int) p->tstamp.tv_nsec,
+			p->spu_id,
+			(unsigned int) p->type,
+			(unsigned int) p->val,
+			(unsigned long long) p->timebase);
+}
+
+static ssize_t spufs_switch_log_read(struct file *file, char __user *buf,
+			     size_t len, loff_t *ppos)
+{
+	struct inode *inode = file_inode(file);
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	int error = 0, cnt = 0;
+
+	if (!buf)
+		return -EINVAL;
+
+	error = spu_acquire(ctx);
+	if (error)
+		return error;
+
+	while (cnt < len) {
+		char tbuf[128];
+		int width;
+
+		if (spufs_switch_log_used(ctx) == 0) {
+			if (cnt > 0) {
+				/* If there's data ready to go, we can
+				 * just return straight away */
+				break;
+
+			} else if (file->f_flags & O_NONBLOCK) {
+				error = -EAGAIN;
+				break;
+
+			} else {
+				/* spufs_wait will drop the mutex and
+				 * re-acquire, but since we're in read(), the
+				 * file cannot be _released (and so
+				 * ctx->switch_log is stable).
+				 */
+				error = spufs_wait(ctx->switch_log->wait,
+						spufs_switch_log_used(ctx) > 0);
+
+				/* On error, spufs_wait returns without the
+				 * state mutex held */
+				if (error)
+					return error;
+
+				/* We may have had entries read from underneath
+				 * us while we dropped the mutex in spufs_wait,
+				 * so re-check */
+				if (spufs_switch_log_used(ctx) == 0)
+					continue;
+			}
+		}
+
+		width = switch_log_sprint(ctx, tbuf, sizeof(tbuf));
+		if (width < len)
+			ctx->switch_log->tail =
+				(ctx->switch_log->tail + 1) %
+				 SWITCH_LOG_BUFSIZE;
+		else
+			/* If the record is greater than space available return
+			 * partial buffer (so far) */
+			break;
+
+		error = copy_to_user(buf + cnt, tbuf, width);
+		if (error)
+			break;
+		cnt += width;
+	}
+
+	spu_release(ctx);
+
+	return cnt == 0 ? error : cnt;
+}
+
+static unsigned int spufs_switch_log_poll(struct file *file, poll_table *wait)
+{
+	struct inode *inode = file_inode(file);
+	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+	unsigned int mask = 0;
+	int rc;
+
+	poll_wait(file, &ctx->switch_log->wait, wait);
+
+	rc = spu_acquire(ctx);
+	if (rc)
+		return rc;
+
+	if (spufs_switch_log_used(ctx) > 0)
+		mask |= POLLIN;
+
+	spu_release(ctx);
+
+	return mask;
+}
+
+static const struct file_operations spufs_switch_log_fops = {
+	.open		= spufs_switch_log_open,
+	.read		= spufs_switch_log_read,
+	.poll		= spufs_switch_log_poll,
+	.release	= spufs_switch_log_release,
+	.llseek		= no_llseek,
+};
+
+/**
+ * Log a context switch event to a switch log reader.
+ *
+ * Must be called with ctx->state_mutex held.
+ */
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+		u32 type, u32 val)
+{
+	if (!ctx->switch_log)
+		return;
+
+	if (spufs_switch_log_avail(ctx) > 1) {
+		struct switch_log_entry *p;
+
+		p = ctx->switch_log->log + ctx->switch_log->head;
+		ktime_get_ts(&p->tstamp);
+		p->timebase = get_tb();
+		p->spu_id = spu ? spu->number : -1;
+		p->type = type;
+		p->val = val;
+
+		ctx->switch_log->head =
+			(ctx->switch_log->head + 1) % SWITCH_LOG_BUFSIZE;
+	}
+
+	wake_up(&ctx->switch_log->wait);
+}
+
+static int spufs_show_ctx(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+	u64 mfc_control_RW;
+
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->spu) {
+		struct spu *spu = ctx->spu;
+		struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+		spin_lock_irq(&spu->register_lock);
+		mfc_control_RW = in_be64(&priv2->mfc_control_RW);
+		spin_unlock_irq(&spu->register_lock);
+	} else {
+		struct spu_state *csa = &ctx->csa;
+
+		mfc_control_RW = csa->priv2.mfc_control_RW;
+	}
+
+	seq_printf(s, "%c flgs(%lx) sflgs(%lx) pri(%d) ts(%d) spu(%02d)"
+		" %c %llx %llx %llx %llx %x %x\n",
+		ctx->state == SPU_STATE_SAVED ? 'S' : 'R',
+		ctx->flags,
+		ctx->sched_flags,
+		ctx->prio,
+		ctx->time_slice,
+		ctx->spu ? ctx->spu->number : -1,
+		!list_empty(&ctx->rq) ? 'q' : ' ',
+		ctx->csa.class_0_pending,
+		ctx->csa.class_0_dar,
+		ctx->csa.class_1_dsisr,
+		mfc_control_RW,
+		ctx->ops->runcntl_read(ctx),
+		ctx->ops->status_read(ctx));
+
+	mutex_unlock(&ctx->state_mutex);
+
+	return 0;
+}
+
+static int spufs_ctx_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_ctx, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_ctx_fops = {
+	.open           = spufs_ctx_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+const struct spufs_tree_descr spufs_dir_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
+	{ "mem",  &spufs_mem_fops,  0666, LS_SIZE, },
+	{ "regs", &spufs_regs_fops,  0666, sizeof(struct spu_reg128[128]), },
 	{ "mbox", &spufs_mbox_fops, 0444, },
 	{ "ibox", &spufs_ibox_fops, 0444, },
 	{ "wbox", &spufs_wbox_fops, 0222, },
-	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, },
-	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, },
-	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, },
+	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
 	{ "signal1", &spufs_signal1_fops, 0666, },
 	{ "signal2", &spufs_signal2_fops, 0666, },
 	{ "signal1_type", &spufs_signal1_type, 0666, },
 	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
+	{ "fpcr", &spufs_fpcr_fops, 0666, sizeof(struct spu_reg128), },
+	{ "lslr", &spufs_lslr_ops, 0444, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
 	{ "npc", &spufs_npc_ops, 0666, },
-	{ "fpcr", &spufs_fpcr_fops, 0666, },
+	{ "srr0", &spufs_srr0_ops, 0666, },
 	{ "decr", &spufs_decr_ops, 0666, },
 	{ "decr_status", &spufs_decr_status_ops, 0666, },
-	{ "spu_tag_mask", &spufs_spu_tag_mask_ops, 0666, },
 	{ "event_mask", &spufs_event_mask_ops, 0666, },
-	{ "srr0", &spufs_srr0_ops, 0666, },
+	{ "event_status", &spufs_event_status_ops, 0444, },
+	{ "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "mbox_info", &spufs_mbox_info_fops, 0444, sizeof(u32), },
+	{ "ibox_info", &spufs_ibox_info_fops, 0444, sizeof(u32), },
+	{ "wbox_info", &spufs_wbox_info_fops, 0444, sizeof(u32), },
+	{ "dma_info", &spufs_dma_info_fops, 0444,
+		sizeof(struct spu_dma_info), },
+	{ "proxydma_info", &spufs_proxydma_info_fops, 0444,
+		sizeof(struct spu_proxydma_info)},
+	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
+	{ "switch_log", &spufs_switch_log_fops, 0444 },
 	{},
 };
+
+const struct spufs_tree_descr spufs_dir_nosched_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
+	{ "mem",  &spufs_mem_fops,  0666, LS_SIZE, },
+	{ "mbox", &spufs_mbox_fops, 0444, },
+	{ "ibox", &spufs_ibox_fops, 0444, },
+	{ "wbox", &spufs_wbox_fops, 0222, },
+	{ "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+	{ "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+	{ "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+	{ "signal1", &spufs_signal1_nosched_fops, 0222, },
+	{ "signal2", &spufs_signal2_nosched_fops, 0222, },
+	{ "signal1_type", &spufs_signal1_type, 0666, },
+	{ "signal2_type", &spufs_signal2_type, 0666, },
+	{ "mss", &spufs_mss_fops, 0666, },
+	{ "mfc", &spufs_mfc_fops, 0666, },
+	{ "cntl", &spufs_cntl_fops,  0666, },
+	{ "npc", &spufs_npc_ops, 0666, },
+	{ "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+	{ "phys-id", &spufs_id_ops, 0666, },
+	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
+	{},
+};
+
+const struct spufs_tree_descr spufs_dir_debug_contents[] = {
+	{ ".ctx", &spufs_ctx_fops, 0444, },
+	{},
+};
+
+const struct spufs_coredump_reader spufs_coredump_read[] = {
+	{ "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])},
+	{ "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) },
+	{ "lslr", NULL, spufs_lslr_get, 19 },
+	{ "decr", NULL, spufs_decr_get, 19 },
+	{ "decr_status", NULL, spufs_decr_status_get, 19 },
+	{ "mem", __spufs_mem_read, NULL, LS_SIZE, },
+	{ "signal1", __spufs_signal1_read, NULL, sizeof(u32) },
+	{ "signal1_type", NULL, spufs_signal1_type_get, 19 },
+	{ "signal2", __spufs_signal2_read, NULL, sizeof(u32) },
+	{ "signal2_type", NULL, spufs_signal2_type_get, 19 },
+	{ "event_mask", NULL, spufs_event_mask_get, 19 },
+	{ "event_status", NULL, spufs_event_status_get, 19 },
+	{ "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) },
+	{ "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) },
+	{ "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)},
+	{ "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)},
+	{ "proxydma_info", __spufs_proxydma_info_read,
+			   NULL, sizeof(struct spu_proxydma_info)},
+	{ "object-id", NULL, spufs_object_id_get, 19 },
+	{ "npc", NULL, spufs_npc_get, 19 },
+	{ NULL },
+};
diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c
new file mode 100644
index 00000000000..71a44325302
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -0,0 +1,87 @@
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "spufs.h"
+
+struct spu_gang *alloc_spu_gang(void)
+{
+	struct spu_gang *gang;
+
+	gang = kzalloc(sizeof *gang, GFP_KERNEL);
+	if (!gang)
+		goto out;
+
+	kref_init(&gang->kref);
+	mutex_init(&gang->mutex);
+	mutex_init(&gang->aff_mutex);
+	INIT_LIST_HEAD(&gang->list);
+	INIT_LIST_HEAD(&gang->aff_list_head);
+
+out:
+	return gang;
+}
+
+static void destroy_spu_gang(struct kref *kref)
+{
+	struct spu_gang *gang;
+	gang = container_of(kref, struct spu_gang, kref);
+	WARN_ON(gang->contexts || !list_empty(&gang->list));
+	kfree(gang);
+}
+
+struct spu_gang *get_spu_gang(struct spu_gang *gang)
+{
+	kref_get(&gang->kref);
+	return gang;
+}
+
+int put_spu_gang(struct spu_gang *gang)
+{
+	return kref_put(&gang->kref, &destroy_spu_gang);
+}
+
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	ctx->gang = get_spu_gang(gang);
+	list_add(&ctx->gang_list, &gang->list);
+	gang->contexts++;
+	mutex_unlock(&gang->mutex);
+}
+
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+	mutex_lock(&gang->mutex);
+	WARN_ON(ctx->gang != gang);
+	if (!list_empty(&ctx->aff_list)) {
+		list_del_init(&ctx->aff_list);
+		gang->aff_flags &= ~AFF_OFFSETS_SET;
+	}
+	list_del_init(&ctx->gang_list);
+	gang->contexts--;
+	mutex_unlock(&gang->mutex);
+
+	put_spu_gang(gang);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
index 5445719bff7..8655c4cbefc 100644
--- a/arch/powerpc/platforms/cell/spufs/hw_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -18,20 +18,18 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/config.h>
-#include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/poll.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 
 #include <asm/io.h>
 #include <asm/spu.h>
+#include <asm/spu_priv1.h>
 #include <asm/spu_csa.h>
 #include <asm/mmu_context.h>
 #include "spufs.h"
@@ -77,16 +75,18 @@ static unsigned int spu_hw_mbox_stat_poll(struct spu_context *ctx,
 		if (stat & 0xff0000)
 			ret |= POLLIN | POLLRDNORM;
 		else {
-			spu_int_stat_clear(spu, 2, 0x1);
-			spu_int_mask_or(spu, 2, 0x1);
+			spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR);
+			spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
 		}
 	}
 	if (events & (POLLOUT | POLLWRNORM)) {
 		if (stat & 0x00ff00)
 			ret = POLLOUT | POLLWRNORM;
 		else {
-			spu_int_stat_clear(spu, 2, 0x10);
-			spu_int_mask_or(spu, 2, 0x10);
+			spu_int_stat_clear(spu, 2,
+					CLASS2_MAILBOX_THRESHOLD_INTR);
+			spu_int_mask_or(spu, 2,
+					CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
 		}
 	}
 	spin_unlock_irq(&spu->register_lock);
@@ -107,7 +107,7 @@ static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data)
 		ret = 4;
 	} else {
 		/* make sure we get woken up by the interrupt */
-		spu_int_mask_or(spu, 2, 0x1);
+		spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
 		ret = 0;
 	}
 	spin_unlock_irq(&spu->register_lock);
@@ -128,28 +128,18 @@ static int spu_hw_wbox_write(struct spu_context *ctx, u32 data)
 	} else {
 		/* make sure we get woken up by the interrupt when space
 		   becomes available */
-		spu_int_mask_or(spu, 2, 0x10);
+		spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
 		ret = 0;
 	}
 	spin_unlock_irq(&spu->register_lock);
 	return ret;
 }
 
-static u32 spu_hw_signal1_read(struct spu_context *ctx)
-{
-	return in_be32(&ctx->spu->problem->signal_notify1);
-}
-
 static void spu_hw_signal1_write(struct spu_context *ctx, u32 data)
 {
 	out_be32(&ctx->spu->problem->signal_notify1, data);
 }
 
-static u32 spu_hw_signal2_read(struct spu_context *ctx)
-{
-	return in_be32(&ctx->spu->problem->signal_notify1);
-}
-
 static void spu_hw_signal2_write(struct spu_context *ctx, u32 data)
 {
 	out_be32(&ctx->spu->problem->signal_notify2, data);
@@ -217,10 +207,24 @@ static char *spu_hw_get_ls(struct spu_context *ctx)
 	return ctx->spu->local_store;
 }
 
+static void spu_hw_privcntl_write(struct spu_context *ctx, u64 val)
+{
+	out_be64(&ctx->spu->priv2->spu_privcntl_RW, val);
+}
+
+static u32 spu_hw_runcntl_read(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->spu_runcntl_RW);
+}
+
 static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val)
 {
-	eieio();
+	spin_lock_irq(&ctx->spu->register_lock);
+	if (val & SPU_RUNCNTL_ISOLATE)
+		spu_hw_privcntl_write(ctx,
+			SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK);
 	out_be32(&ctx->spu->problem->spu_runcntl_RW, val);
+	spin_unlock_irq(&ctx->spu->register_lock);
 }
 
 static void spu_hw_runcntl_stop(struct spu_context *ctx)
@@ -232,15 +236,96 @@ static void spu_hw_runcntl_stop(struct spu_context *ctx)
 	spin_unlock_irq(&ctx->spu->register_lock);
 }
 
+static void spu_hw_master_start(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static void spu_hw_master_stop(struct spu_context *ctx)
+{
+	struct spu *spu = ctx->spu;
+	u64 sr1;
+
+	spin_lock_irq(&spu->register_lock);
+	sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+	spu_mfc_sr1_set(spu, sr1);
+	spin_unlock_irq(&spu->register_lock);
+}
+
+static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode)
+{
+	struct spu_problem __iomem *prob = ctx->spu->problem;
+	int ret;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	ret = -EAGAIN;
+	if (in_be32(&prob->dma_querytype_RW))
+		goto out;
+	ret = 0;
+	out_be32(&prob->dma_querymask_RW, mask);
+	out_be32(&prob->dma_querytype_RW, mode);
+out:
+	spin_unlock_irq(&ctx->spu->register_lock);
+	return ret;
+}
+
+static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_tagstatus_R);
+}
+
+static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx)
+{
+	return in_be32(&ctx->spu->problem->dma_qstatus_R);
+}
+
+static int spu_hw_send_mfc_command(struct spu_context *ctx,
+					struct mfc_dma_command *cmd)
+{
+	u32 status;
+	struct spu_problem __iomem *prob = ctx->spu->problem;
+
+	spin_lock_irq(&ctx->spu->register_lock);
+	out_be32(&prob->mfc_lsa_W, cmd->lsa);
+	out_be64(&prob->mfc_ea_W, cmd->ea);
+	out_be32(&prob->mfc_union_W.by32.mfc_size_tag32,
+				cmd->size << 16 | cmd->tag);
+	out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32,
+				cmd->class << 16 | cmd->cmd);
+	status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+	spin_unlock_irq(&ctx->spu->register_lock);
+
+	switch (status & 0xffff) {
+	case 0:
+		return 0;
+	case 2:
+		return -EAGAIN;
+	default:
+		return -EINVAL;
+	}
+}
+
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+	struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+	if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+		out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
 struct spu_context_ops spu_hw_ops = {
 	.mbox_read = spu_hw_mbox_read,
 	.mbox_stat_read = spu_hw_mbox_stat_read,
 	.mbox_stat_poll = spu_hw_mbox_stat_poll,
 	.ibox_read = spu_hw_ibox_read,
 	.wbox_write = spu_hw_wbox_write,
-	.signal1_read = spu_hw_signal1_read,
 	.signal1_write = spu_hw_signal1_write,
-	.signal2_read = spu_hw_signal2_read,
 	.signal2_write = spu_hw_signal2_write,
 	.signal1_type_set = spu_hw_signal1_type_set,
 	.signal1_type_get = spu_hw_signal1_type_get,
@@ -250,6 +335,15 @@ struct spu_context_ops spu_hw_ops = {
 	.npc_write = spu_hw_npc_write,
 	.status_read = spu_hw_status_read,
 	.get_ls = spu_hw_get_ls,
+	.privcntl_write = spu_hw_privcntl_write,
+	.runcntl_read = spu_hw_runcntl_read,
 	.runcntl_write = spu_hw_runcntl_write,
 	.runcntl_stop = spu_hw_runcntl_stop,
+	.master_start = spu_hw_master_start,
+	.master_stop = spu_hw_master_stop,
+	.set_mfc_query = spu_hw_set_mfc_query,
+	.read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
+	.get_mfc_free_elements = spu_hw_get_mfc_free_elements,
+	.send_mfc_command = spu_hw_send_mfc_command,
+	.restart_dma = spu_hw_restart_dma,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index b3962c3a034..87ba7cf99cd 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -1,3 +1,4 @@
+
 /*
  * SPU file system
  *
@@ -22,6 +23,7 @@
 
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/fsnotify.h>
 #include <linux/backing-dev.h>
 #include <linux/init.h>
 #include <linux/ioctl.h>
@@ -33,45 +35,63 @@
 #include <linux/slab.h>
 #include <linux/parser.h>
 
-#include <asm/io.h>
-#include <asm/semaphore.h>
+#include <asm/prom.h>
 #include <asm/spu.h>
+#include <asm/spu_priv1.h>
 #include <asm/uaccess.h>
 
 #include "spufs.h"
 
-static kmem_cache_t *spufs_inode_cache;
+struct spufs_sb_info {
+	int debug;
+};
+
+static struct kmem_cache *spufs_inode_cache;
+char *isolated_loader;
+static int isolated_loader_size;
+
+static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
 
 static struct inode *
 spufs_alloc_inode(struct super_block *sb)
 {
 	struct spufs_inode_info *ei;
 
-	ei = kmem_cache_alloc(spufs_inode_cache, SLAB_KERNEL);
+	ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
 	if (!ei)
 		return NULL;
+
+	ei->i_gang = NULL;
+	ei->i_ctx = NULL;
+	ei->i_openers = 0;
+
 	return &ei->vfs_inode;
 }
 
-static void
-spufs_destroy_inode(struct inode *inode)
+static void spufs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
 	kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
 }
 
+static void spufs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, spufs_i_callback);
+}
+
 static void
-spufs_init_once(void *p, kmem_cache_t * cachep, unsigned long flags)
+spufs_init_once(void *p)
 {
 	struct spufs_inode_info *ei = p;
 
-	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR) {
-		inode_init_once(&ei->vfs_inode);
-	}
+	inode_init_once(&ei->vfs_inode);
 }
 
 static struct inode *
-spufs_new_inode(struct super_block *sb, int mode)
+spufs_new_inode(struct super_block *sb, umode_t mode)
 {
 	struct inode *inode;
 
@@ -79,11 +99,10 @@ spufs_new_inode(struct super_block *sb, int mode)
 	if (!inode)
 		goto out;
 
+	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
-	inode->i_uid = current->fsuid;
-	inode->i_gid = current->fsgid;
-	inode->i_blksize = PAGE_CACHE_SIZE;
-	inode->i_blocks = 0;
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 out:
 	return inode;
@@ -97,16 +116,18 @@ spufs_setattr(struct dentry *dentry, struct iattr *attr)
 	if ((attr->ia_valid & ATTR_SIZE) &&
 	    (attr->ia_size != inode->i_size))
 		return -EINVAL;
-	return inode_setattr(inode, attr);
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 
 static int
 spufs_new_file(struct super_block *sb, struct dentry *dentry,
-		struct file_operations *fops, int mode,
-		struct spu_context *ctx)
+		const struct file_operations *fops, umode_t mode,
+		size_t size, struct spu_context *ctx)
 {
-	static struct inode_operations spufs_file_iops = {
+	static const struct inode_operations spufs_file_iops = {
 		.setattr = spufs_setattr,
 	};
 	struct inode *inode;
@@ -120,234 +141,470 @@ spufs_new_file(struct super_block *sb, struct dentry *dentry,
 	ret = 0;
 	inode->i_op = &spufs_file_iops;
 	inode->i_fop = fops;
-	inode->u.generic_ip = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
+	inode->i_size = size;
+	inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
 	d_add(dentry, inode);
 out:
 	return ret;
 }
 
 static void
-spufs_delete_inode(struct inode *inode)
+spufs_evict_inode(struct inode *inode)
 {
-	if (SPUFS_I(inode)->i_ctx)
-		put_spu_context(SPUFS_I(inode)->i_ctx);
+	struct spufs_inode_info *ei = SPUFS_I(inode);
 	clear_inode(inode);
+	if (ei->i_ctx)
+		put_spu_context(ei->i_ctx);
+	if (ei->i_gang)
+		put_spu_gang(ei->i_gang);
 }
 
 static void spufs_prune_dir(struct dentry *dir)
 {
 	struct dentry *dentry, *tmp;
+
 	mutex_lock(&dir->d_inode->i_mutex);
 	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
-		spin_lock(&dcache_lock);
 		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry)) && dentry->d_inode) {
-			dget_locked(dentry);
+			dget_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
 			simple_unlink(dir->d_inode, dentry);
-			spin_unlock(&dcache_lock);
+			/* XXX: what was dcache_lock protecting here? Other
+			 * filesystems (IB, configfs) release dcache_lock
+			 * before unlink */
 			dput(dentry);
 		} else {
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 		}
 	}
 	shrink_dcache_parent(dir);
 	mutex_unlock(&dir->d_inode->i_mutex);
 }
 
-static int spufs_rmdir(struct inode *root, struct dentry *dir_dentry)
+/* Caller must hold parent->i_mutex */
+static int spufs_rmdir(struct inode *parent, struct dentry *dir)
 {
-	struct spu_context *ctx;
-
 	/* remove all entries */
-	mutex_lock(&root->i_mutex);
-	spufs_prune_dir(dir_dentry);
-	mutex_unlock(&root->i_mutex);
-
+	int res;
+	spufs_prune_dir(dir);
+	d_drop(dir);
+	res = simple_rmdir(parent, dir);
 	/* We have to give up the mm_struct */
-	ctx = SPUFS_I(dir_dentry->d_inode)->i_ctx;
-	spu_forget(ctx);
-
-	/* XXX Do we need to hold i_mutex here ? */
-	return simple_rmdir(root, dir_dentry);
+	spu_forget(SPUFS_I(dir->d_inode)->i_ctx);
+	return res;
 }
 
-static int spufs_fill_dir(struct dentry *dir, struct tree_descr *files,
-			  int mode, struct spu_context *ctx)
+static int spufs_fill_dir(struct dentry *dir,
+		const struct spufs_tree_descr *files, umode_t mode,
+		struct spu_context *ctx)
 {
-	struct dentry *dentry;
-	int ret;
-
 	while (files->name && files->name[0]) {
-		ret = -ENOMEM;
-		dentry = d_alloc_name(dir, files->name);
+		int ret;
+		struct dentry *dentry = d_alloc_name(dir, files->name);
 		if (!dentry)
-			goto out;
+			return -ENOMEM;
 		ret = spufs_new_file(dir->d_sb, dentry, files->ops,
-					files->mode & mode, ctx);
+					files->mode & mode, files->size, ctx);
 		if (ret)
-			goto out;
+			return ret;
 		files++;
 	}
 	return 0;
-out:
-	spufs_prune_dir(dir);
-	return ret;
 }
 
 static int spufs_dir_close(struct inode *inode, struct file *file)
 {
-	struct inode *dir;
-	struct dentry *dentry;
+	struct spu_context *ctx;
+	struct inode *parent;
+	struct dentry *dir;
 	int ret;
 
-	dentry = file->f_dentry;
-	dir = dentry->d_parent->d_inode;
+	dir = file->f_path.dentry;
+	parent = dir->d_parent->d_inode;
+	ctx = SPUFS_I(dir->d_inode)->i_ctx;
 
-	ret = spufs_rmdir(dir, dentry);
+	mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
+	ret = spufs_rmdir(parent, dir);
+	mutex_unlock(&parent->i_mutex);
 	WARN_ON(ret);
 
 	return dcache_dir_close(inode, file);
 }
 
-struct inode_operations spufs_dir_inode_operations = {
-	.lookup = simple_lookup,
-};
-
-struct file_operations spufs_context_fops = {
+const struct file_operations spufs_context_fops = {
 	.open		= dcache_dir_open,
 	.release	= spufs_dir_close,
 	.llseek		= dcache_dir_lseek,
 	.read		= generic_read_dir,
-	.readdir	= dcache_readdir,
-	.fsync		= simple_sync_file,
+	.iterate	= dcache_readdir,
+	.fsync		= noop_fsync,
 };
+EXPORT_SYMBOL_GPL(spufs_context_fops);
 
 static int
-spufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
+		umode_t mode)
 {
 	int ret;
 	struct inode *inode;
 	struct spu_context *ctx;
 
-	ret = -ENOSPC;
 	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
 	if (!inode)
-		goto out;
+		return -ENOSPC;
 
 	if (dir->i_mode & S_ISGID) {
 		inode->i_gid = dir->i_gid;
 		inode->i_mode &= S_ISGID;
 	}
-	ctx = alloc_spu_context(inode->i_mapping);
+	ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
 	SPUFS_I(inode)->i_ctx = ctx;
-	if (!ctx)
-		goto out_iput;
+	if (!ctx) {
+		iput(inode);
+		return -ENOSPC;
+	}
 
-	inode->i_op = &spufs_dir_inode_operations;
+	ctx->flags = flags;
+	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
-	ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
-	if (ret)
-		goto out_free_ctx;
 
-	d_instantiate(dentry, inode);
+	mutex_lock(&inode->i_mutex);
+
 	dget(dentry);
-	dir->i_nlink++;
-	dentry->d_inode->i_nlink++;
-	goto out;
+	inc_nlink(dir);
+	inc_nlink(inode);
+
+	d_instantiate(dentry, inode);
+
+	if (flags & SPU_CREATE_NOSCHED)
+		ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
+					 mode, ctx);
+	else
+		ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
+
+	if (!ret && spufs_get_sb_info(dir->i_sb)->debug)
+		ret = spufs_fill_dir(dentry, spufs_dir_debug_contents,
+				mode, ctx);
+
+	if (ret)
+		spufs_rmdir(dir, dentry);
+
+	mutex_unlock(&inode->i_mutex);
 
-out_free_ctx:
-	put_spu_context(ctx);
-out_iput:
-	iput(inode);
-out:
 	return ret;
 }
 
-static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt)
+static int spufs_context_open(struct path *path)
 {
 	int ret;
 	struct file *filp;
 
 	ret = get_unused_fd();
-	if (ret < 0) {
-		dput(dentry);
-		mntput(mnt);
-		goto out;
-	}
+	if (ret < 0)
+		return ret;
 
-	filp = dentry_open(dentry, mnt, O_RDONLY);
+	filp = dentry_open(path, O_RDONLY, current_cred());
 	if (IS_ERR(filp)) {
 		put_unused_fd(ret);
-		ret = PTR_ERR(filp);
-		goto out;
+		return PTR_ERR(filp);
 	}
 
 	filp->f_op = &spufs_context_fops;
 	fd_install(ret, filp);
-out:
 	return ret;
 }
 
-static struct file_system_type spufs_type;
+static struct spu_context *
+spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
+						struct file *filp)
+{
+	struct spu_context *tmp, *neighbor, *err;
+	int count, node;
+	int aff_supp;
+
+	aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
+					struct spu, cbe_list))->aff_list);
+
+	if (!aff_supp)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_GANG)
+		return ERR_PTR(-EINVAL);
+
+	if (flags & SPU_CREATE_AFFINITY_MEM &&
+	    gang->aff_ref_ctx &&
+	    gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
+		return ERR_PTR(-EEXIST);
+
+	if (gang->aff_flags & AFF_MERGED)
+		return ERR_PTR(-EBUSY);
+
+	neighbor = NULL;
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (!filp || filp->f_op != &spufs_context_fops)
+			return ERR_PTR(-EINVAL);
+
+		neighbor = get_spu_context(
+				SPUFS_I(file_inode(filp))->i_ctx);
+
+		if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
+		    !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
+		    !list_entry(neighbor->aff_list.next, struct spu_context,
+		    aff_list)->aff_head) {
+			err = ERR_PTR(-EEXIST);
+			goto out_put_neighbor;
+		}
+
+		if (gang != neighbor->gang) {
+			err = ERR_PTR(-EINVAL);
+			goto out_put_neighbor;
+		}
+
+		count = 1;
+		list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+			count++;
+		if (list_empty(&neighbor->aff_list))
+			count++;
+
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			if ((cbe_spu_info[node].n_spus - atomic_read(
+				&cbe_spu_info[node].reserved_spus)) >= count)
+				break;
+		}
+
+		if (node == MAX_NUMNODES) {
+			err = ERR_PTR(-EEXIST);
+			goto out_put_neighbor;
+		}
+	}
+
+	return neighbor;
+
+out_put_neighbor:
+	put_spu_context(neighbor);
+	return err;
+}
+
+static void
+spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
+					struct spu_context *neighbor)
+{
+	if (flags & SPU_CREATE_AFFINITY_MEM)
+		ctx->gang->aff_ref_ctx = ctx;
+
+	if (flags & SPU_CREATE_AFFINITY_SPU) {
+		if (list_empty(&neighbor->aff_list)) {
+			list_add_tail(&neighbor->aff_list,
+				&ctx->gang->aff_list_head);
+			neighbor->aff_head = 1;
+		}
+
+		if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
+		    || list_entry(neighbor->aff_list.next, struct spu_context,
+							aff_list)->aff_head) {
+			list_add(&ctx->aff_list, &neighbor->aff_list);
+		} else  {
+			list_add_tail(&ctx->aff_list, &neighbor->aff_list);
+			if (neighbor->aff_head) {
+				neighbor->aff_head = 0;
+				ctx->aff_head = 1;
+			}
+		}
+
+		if (!ctx->gang->aff_ref_ctx)
+			ctx->gang->aff_ref_ctx = ctx;
+	}
+}
 
-long spufs_create_thread(struct nameidata *nd,
-			 unsigned int flags, mode_t mode)
+static int
+spufs_create_context(struct inode *inode, struct dentry *dentry,
+			struct vfsmount *mnt, int flags, umode_t mode,
+			struct file *aff_filp)
 {
-	struct dentry *dentry;
 	int ret;
+	int affinity;
+	struct spu_gang *gang;
+	struct spu_context *neighbor;
+	struct path path = {.mnt = mnt, .dentry = dentry};
 
-	/* need to be at the root of spufs */
-	ret = -EINVAL;
-	if (nd->dentry->d_sb->s_type != &spufs_type ||
-	    nd->dentry != nd->dentry->d_sb->s_root)
-		goto out;
+	if ((flags & SPU_CREATE_NOSCHED) &&
+	    !capable(CAP_SYS_NICE))
+		return -EPERM;
 
-	dentry = lookup_create(nd, 1);
-	ret = PTR_ERR(dentry);
-	if (IS_ERR(dentry))
-		goto out_dir;
+	if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
+	    == SPU_CREATE_ISOLATE)
+		return -EINVAL;
 
-	ret = -EEXIST;
-	if (dentry->d_inode)
-		goto out_dput;
+	if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
+		return -ENODEV;
+
+	gang = NULL;
+	neighbor = NULL;
+	affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
+	if (affinity) {
+		gang = SPUFS_I(inode)->i_gang;
+		if (!gang)
+			return -EINVAL;
+		mutex_lock(&gang->aff_mutex);
+		neighbor = spufs_assert_affinity(flags, gang, aff_filp);
+		if (IS_ERR(neighbor)) {
+			ret = PTR_ERR(neighbor);
+			goto out_aff_unlock;
+		}
+	}
 
-	mode &= ~current->fs->umask;
-	ret = spufs_mkdir(nd->dentry->d_inode, dentry, mode & S_IRWXUGO);
+	ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
 	if (ret)
-		goto out_dput;
+		goto out_aff_unlock;
+
+	if (affinity) {
+		spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
+								neighbor);
+		if (neighbor)
+			put_spu_context(neighbor);
+	}
+
+	ret = spufs_context_open(&path);
+	if (ret < 0)
+		WARN_ON(spufs_rmdir(inode, dentry));
+
+out_aff_unlock:
+	if (affinity)
+		mutex_unlock(&gang->aff_mutex);
+	return ret;
+}
+
+static int
+spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	int ret;
+	struct inode *inode;
+	struct spu_gang *gang;
+
+	ret = -ENOSPC;
+	inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+	if (!inode)
+		goto out;
+
+	ret = 0;
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		inode->i_mode &= S_ISGID;
+	}
+	gang = alloc_spu_gang();
+	SPUFS_I(inode)->i_ctx = NULL;
+	SPUFS_I(inode)->i_gang = gang;
+	if (!gang)
+		goto out_iput;
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+
+	d_instantiate(dentry, inode);
+	inc_nlink(dir);
+	inc_nlink(dentry->d_inode);
+	return ret;
+
+out_iput:
+	iput(inode);
+out:
+	return ret;
+}
+
+static int spufs_gang_open(struct path *path)
+{
+	int ret;
+	struct file *filp;
+
+	ret = get_unused_fd();
+	if (ret < 0)
+		return ret;
 
 	/*
 	 * get references for dget and mntget, will be released
 	 * in error path of *_open().
 	 */
-	ret = spufs_context_open(dget(dentry), mntget(nd->mnt));
-	if (ret < 0)
-		spufs_rmdir(nd->dentry->d_inode, dentry);
+	filp = dentry_open(path, O_RDONLY, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(ret);
+		return PTR_ERR(filp);
+	}
+
+	filp->f_op = &simple_dir_operations;
+	fd_install(ret, filp);
+	return ret;
+}
+
+static int spufs_create_gang(struct inode *inode,
+			struct dentry *dentry,
+			struct vfsmount *mnt, umode_t mode)
+{
+	struct path path = {.mnt = mnt, .dentry = dentry};
+	int ret;
+
+	ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO);
+	if (!ret) {
+		ret = spufs_gang_open(&path);
+		if (ret < 0) {
+			int err = simple_rmdir(inode, dentry);
+			WARN_ON(err);
+		}
+	}
+	return ret;
+}
+
+
+static struct file_system_type spufs_type;
+
+long spufs_create(struct path *path, struct dentry *dentry,
+		unsigned int flags, umode_t mode, struct file *filp)
+{
+	struct inode *dir = path->dentry->d_inode;
+	int ret;
+
+	/* check if we are on spufs */
+	if (path->dentry->d_sb->s_type != &spufs_type)
+		return -EINVAL;
+
+	/* don't accept undefined flags */
+	if (flags & (~SPU_CREATE_FLAG_ALL))
+		return -EINVAL;
+
+	/* only threads can be underneath a gang */
+	if (path->dentry != path->dentry->d_sb->s_root)
+		if ((flags & SPU_CREATE_GANG) || !SPUFS_I(dir)->i_gang)
+			return -EINVAL;
+
+	mode &= ~current_umask();
+
+	if (flags & SPU_CREATE_GANG)
+		ret = spufs_create_gang(dir, dentry, path->mnt, mode);
+	else
+		ret = spufs_create_context(dir, dentry, path->mnt, flags, mode,
+					    filp);
+	if (ret >= 0)
+		fsnotify_mkdir(dir, dentry);
 
-out_dput:
-	dput(dentry);
-out_dir:
-	mutex_unlock(&nd->dentry->d_inode->i_mutex);
-out:
 	return ret;
 }
 
 /* File system initialization */
 enum {
-	Opt_uid, Opt_gid, Opt_err,
+	Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
 };
 
-static match_table_t spufs_tokens = {
-	{ Opt_uid, "uid=%d" },
-	{ Opt_gid, "gid=%d" },
-	{ Opt_err, NULL  },
+static const match_table_t spufs_tokens = {
+	{ Opt_uid,   "uid=%d" },
+	{ Opt_gid,   "gid=%d" },
+	{ Opt_mode,  "mode=%o" },
+	{ Opt_debug, "debug" },
+	{ Opt_err,    NULL  },
 };
 
 static int
-spufs_parse_options(char *options, struct inode *root)
+spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
 {
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
@@ -363,12 +620,24 @@ spufs_parse_options(char *options, struct inode *root)
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return 0;
-			root->i_uid = option;
+			root->i_uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(root->i_uid))
+				return 0;
 			break;
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return 0;
-			root->i_gid = option;
+			root->i_gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(root->i_gid))
+				return 0;
+			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				return 0;
+			root->i_mode = option | S_IFDIR;
+			break;
+		case Opt_debug:
+			spufs_get_sb_info(sb)->debug = 1;
 			break;
 		default:
 			return 0;
@@ -377,29 +646,65 @@ spufs_parse_options(char *options, struct inode *root)
 	return 1;
 }
 
+static void spufs_exit_isolated_loader(void)
+{
+	free_pages((unsigned long) isolated_loader,
+			get_order(isolated_loader_size));
+}
+
+static void
+spufs_init_isolated_loader(void)
+{
+	struct device_node *dn;
+	const char *loader;
+	int size;
+
+	dn = of_find_node_by_path("/spu-isolation");
+	if (!dn)
+		return;
+
+	loader = of_get_property(dn, "loader", &size);
+	if (!loader)
+		return;
+
+	/* the loader must be align on a 16 byte boundary */
+	isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size));
+	if (!isolated_loader)
+		return;
+
+	isolated_loader_size = size;
+	memcpy(isolated_loader, loader, size);
+	printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
+}
+
 static int
 spufs_create_root(struct super_block *sb, void *data)
 {
 	struct inode *inode;
 	int ret;
 
+	ret = -ENODEV;
+	if (!spu_management_ops)
+		goto out;
+
 	ret = -ENOMEM;
 	inode = spufs_new_inode(sb, S_IFDIR | 0775);
 	if (!inode)
 		goto out;
 
-	inode->i_op = &spufs_dir_inode_operations;
+	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 	SPUFS_I(inode)->i_ctx = NULL;
+	inc_nlink(inode);
 
 	ret = -EINVAL;
-	if (!spufs_parse_options(data, inode))
+	if (!spufs_parse_options(sb, data, inode))
 		goto out_iput;
 
 	ret = -ENOMEM;
-	sb->s_root = d_alloc_root(inode);
+	sb->s_root = d_make_root(inode);
 	if (!sb->s_root)
-		goto out_iput;
+		goto out;
 
 	return 0;
 out_iput:
@@ -411,60 +716,79 @@ out:
 static int
 spufs_fill_super(struct super_block *sb, void *data, int silent)
 {
-	static struct super_operations s_ops = {
+	struct spufs_sb_info *info;
+	static const struct super_operations s_ops = {
 		.alloc_inode = spufs_alloc_inode,
 		.destroy_inode = spufs_destroy_inode,
 		.statfs = simple_statfs,
-		.delete_inode = spufs_delete_inode,
-		.drop_inode = generic_delete_inode,
+		.evict_inode = spufs_evict_inode,
+		.show_options = generic_show_options,
 	};
 
+	save_mount_options(sb, data);
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = SPUFS_MAGIC;
 	sb->s_op = &s_ops;
+	sb->s_fs_info = info;
 
 	return spufs_create_root(sb, data);
 }
 
-static struct super_block *
-spufs_get_sb(struct file_system_type *fstype, int flags,
+static struct dentry *
+spufs_mount(struct file_system_type *fstype, int flags,
 		const char *name, void *data)
 {
-	return get_sb_single(fstype, flags, data, spufs_fill_super);
+	return mount_single(fstype, flags, data, spufs_fill_super);
 }
 
 static struct file_system_type spufs_type = {
 	.owner = THIS_MODULE,
 	.name = "spufs",
-	.get_sb = spufs_get_sb,
+	.mount = spufs_mount,
 	.kill_sb = kill_litter_super,
 };
+MODULE_ALIAS_FS("spufs");
 
-static int spufs_init(void)
+static int __init spufs_init(void)
 {
 	int ret;
+
+	ret = -ENODEV;
+	if (!spu_management_ops)
+		goto out;
+
 	ret = -ENOMEM;
 	spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
 			sizeof(struct spufs_inode_info), 0,
-			SLAB_HWCACHE_ALIGN, spufs_init_once, NULL);
+			SLAB_HWCACHE_ALIGN, spufs_init_once);
 
 	if (!spufs_inode_cache)
 		goto out;
-	if (spu_sched_init() != 0) {
-		kmem_cache_destroy(spufs_inode_cache);
-		goto out;
-	}
-	ret = register_filesystem(&spufs_type);
+	ret = spu_sched_init();
 	if (ret)
 		goto out_cache;
 	ret = register_spu_syscalls(&spufs_calls);
 	if (ret)
-		goto out_fs;
+		goto out_sched;
+	ret = register_filesystem(&spufs_type);
+	if (ret)
+		goto out_syscalls;
+
+	spufs_init_isolated_loader();
+
 	return 0;
-out_fs:
-	unregister_filesystem(&spufs_type);
+
+out_syscalls:
+	unregister_spu_syscalls(&spufs_calls);
+out_sched:
+	spu_sched_exit();
 out_cache:
 	kmem_cache_destroy(spufs_inode_cache);
 out:
@@ -472,9 +796,10 @@ out:
 }
 module_init(spufs_init);
 
-static void spufs_exit(void)
+static void __exit spufs_exit(void)
 {
 	spu_sched_exit();
+	spufs_exit_isolated_loader();
 	unregister_spu_syscalls(&spufs_calls);
 	unregister_filesystem(&spufs_type);
 	kmem_cache_destroy(spufs_inode_cache);
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
new file mode 100644
index 00000000000..147069938cf
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,183 @@
+/*
+ * SPU local store allocation routines
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu.h>
+
+#include "spufs.h"
+
+static int spu_alloc_lscsa_std(struct spu_state *csa)
+{
+	struct spu_lscsa *lscsa;
+	unsigned char *p;
+
+	lscsa = vzalloc(sizeof(struct spu_lscsa));
+	if (!lscsa)
+		return -ENOMEM;
+	csa->lscsa = lscsa;
+
+	/* Set LS pages reserved to allow for user-space mapping. */
+	for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		SetPageReserved(vmalloc_to_page(p));
+
+	return 0;
+}
+
+static void spu_free_lscsa_std(struct spu_state *csa)
+{
+	/* Clear reserved bit before vfree. */
+	unsigned char *p;
+
+	if (csa->lscsa == NULL)
+		return;
+
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		ClearPageReserved(vmalloc_to_page(p));
+
+	vfree(csa->lscsa);
+}
+
+#ifdef CONFIG_SPU_FS_64K_LS
+
+#define SPU_64K_PAGE_SHIFT	16
+#define SPU_64K_PAGE_ORDER	(SPU_64K_PAGE_SHIFT - PAGE_SHIFT)
+#define SPU_64K_PAGE_COUNT	(1ul << SPU_64K_PAGE_ORDER)
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+	struct page	**pgarray;
+	unsigned char	*p;
+	int		i, j, n_4k;
+
+	/* Check availability of 64K pages */
+	if (!spu_64k_pages_available())
+		goto fail;
+
+	csa->use_big_pages = 1;
+
+	pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n",
+		 csa);
+
+	/* First try to allocate our 64K pages. We need 5 of them
+	 * with the current implementation. In the future, we should try
+	 * to separate the lscsa with the actual local store image, thus
+	 * allowing us to require only 4 64K pages per context
+	 */
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) {
+		/* XXX This is likely to fail, we should use a special pool
+		 *     similar to what hugetlbfs does.
+		 */
+		csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL,
+						  SPU_64K_PAGE_ORDER);
+		if (csa->lscsa_pages[i] == NULL)
+			goto fail;
+	}
+
+	pr_debug(" success ! creating vmap...\n");
+
+	/* Now we need to create a vmalloc mapping of these for the kernel
+	 * and SPU context switch code to use. Currently, we stick to a
+	 * normal kernel vmalloc mapping, which in our case will be 4K
+	 */
+	n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES;
+	pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL);
+	if (pgarray == NULL)
+		goto fail;
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+		for (j = 0; j < SPU_64K_PAGE_COUNT; j++)
+			/* We assume all the struct page's are contiguous
+			 * which should be hopefully the case for an order 4
+			 * allocation..
+			 */
+			pgarray[i * SPU_64K_PAGE_COUNT + j] =
+				csa->lscsa_pages[i] + j;
+	csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL);
+	kfree(pgarray);
+	if (csa->lscsa == NULL)
+		goto fail;
+
+	memset(csa->lscsa, 0, sizeof(struct spu_lscsa));
+
+	/* Set LS pages reserved to allow for user-space mapping.
+	 *
+	 * XXX isn't that a bit obsolete ? I think we should just
+	 * make sure the page count is high enough. Anyway, won't harm
+	 * for now
+	 */
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		SetPageReserved(vmalloc_to_page(p));
+
+	pr_debug(" all good !\n");
+
+	return 0;
+fail:
+	pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n");
+	spu_free_lscsa(csa);
+	return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+	unsigned char *p;
+	int i;
+
+	if (!csa->use_big_pages) {
+		spu_free_lscsa_std(csa);
+		return;
+	}
+	csa->use_big_pages = 0;
+
+	if (csa->lscsa == NULL)
+		goto free_pages;
+
+	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+		ClearPageReserved(vmalloc_to_page(p));
+
+	vunmap(csa->lscsa);
+	csa->lscsa = NULL;
+
+ free_pages:
+
+	for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
+		if (csa->lscsa_pages[i])
+			__free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER);
+}
+
+#else /* CONFIG_SPU_FS_64K_LS */
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+	return spu_alloc_lscsa_std(csa);
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+	spu_free_lscsa_std(csa);
+}
+
+#endif /* !defined(CONFIG_SPU_FS_64K_LS) */
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 18ea8866c61..4ddf769a64e 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -1,131 +1,454 @@
+#define DEBUG
+
 #include <linux/wait.h>
 #include <linux/ptrace.h>
 
 #include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
 
 #include "spufs.h"
 
 /* interrupt-level stop callback function. */
-void spufs_stop_callback(struct spu *spu)
+void spufs_stop_callback(struct spu *spu, int irq)
 {
 	struct spu_context *ctx = spu->ctx;
 
-	wake_up_all(&ctx->stop_wq);
+	/*
+	 * It should be impossible to preempt a context while an exception
+	 * is being processed, since the context switch code is specially
+	 * coded to deal with interrupts ... But, just in case, sanity check
+	 * the context pointer.  It is OK to return doing nothing since
+	 * the exception will be regenerated when the context is resumed.
+	 */
+	if (ctx) {
+		/* Copy exception arguments into module specific structure */
+		switch(irq) {
+		case 0 :
+			ctx->csa.class_0_pending = spu->class_0_pending;
+			ctx->csa.class_0_dar = spu->class_0_dar;
+			break;
+		case 1 :
+			ctx->csa.class_1_dsisr = spu->class_1_dsisr;
+			ctx->csa.class_1_dar = spu->class_1_dar;
+			break;
+		case 2 :
+			break;
+		}
+
+		/* ensure that the exception status has hit memory before a
+		 * thread waiting on the context's stop queue is woken */
+		smp_wmb();
+
+		wake_up_all(&ctx->stop_wq);
+	}
 }
 
-static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
+int spu_stopped(struct spu_context *ctx, u32 *stat)
 {
-	struct spu *spu;
-	u64 pte_fault;
+	u64 dsisr;
+	u32 stopped;
 
+	stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+		SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+
+top:
 	*stat = ctx->ops->status_read(ctx);
-	if (ctx->state != SPU_STATE_RUNNABLE)
+	if (*stat & stopped) {
+		/*
+		 * If the spu hasn't finished stopping, we need to
+		 * re-read the register to get the stopped value.
+		 */
+		if (*stat & SPU_STATUS_RUNNING)
+			goto top;
 		return 1;
-	spu = ctx->spu;
-	pte_fault = spu->dsisr &
-	    (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED);
-	return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0;
+	}
+
+	if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
+		return 1;
+
+	dsisr = ctx->csa.class_1_dsisr;
+	if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))
+		return 1;
+
+	if (ctx->csa.class_0_pending)
+		return 1;
+
+	return 0;
 }
 
-static inline int spu_run_init(struct spu_context *ctx, u32 * npc,
-			       u32 * status)
+static int spu_setup_isolated(struct spu_context *ctx)
 {
 	int ret;
+	u64 __iomem *mfc_cntl;
+	u64 sr1;
+	u32 status;
+	unsigned long timeout;
+	const u32 status_loading = SPU_STATUS_RUNNING
+		| SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS;
 
-	if ((ret = spu_acquire_runnable(ctx)) != 0)
-		return ret;
-	ctx->ops->npc_write(ctx, *npc);
-	ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+	ret = -ENODEV;
+	if (!isolated_loader)
+		goto out;
+
+	/*
+	 * We need to exclude userspace access to the context.
+	 *
+	 * To protect against memory access we invalidate all ptes
+	 * and make sure the pagefault handlers block on the mutex.
+	 */
+	spu_unmap_mappings(ctx);
+
+	mfc_cntl = &ctx->spu->priv2->mfc_control_RW;
+
+	/* purge the MFC DMA queue to ensure no spurious accesses before we
+	 * enter kernel mode */
+	timeout = jiffies + HZ;
+	out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST);
+	while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK)
+			!= MFC_CNTL_PURGE_DMA_COMPLETE) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n",
+					__func__);
+			ret = -EIO;
+			goto out;
+		}
+		cond_resched();
+	}
+
+	/* clear purge status */
+	out_be64(mfc_cntl, 0);
+
+	/* put the SPE in kernel mode to allow access to the loader */
+	sr1 = spu_mfc_sr1_get(ctx->spu);
+	sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+	/* start the loader */
+	ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32);
+	ctx->ops->signal2_write(ctx,
+			(unsigned long)isolated_loader & 0xffffffff);
+
+	ctx->ops->runcntl_write(ctx,
+			SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+
+	ret = 0;
+	timeout = jiffies + HZ;
+	while (((status = ctx->ops->status_read(ctx)) & status_loading) ==
+				status_loading) {
+		if (time_after(jiffies, timeout)) {
+			printk(KERN_ERR "%s: timeout waiting for loader\n",
+					__func__);
+			ret = -EIO;
+			goto out_drop_priv;
+		}
+		cond_resched();
+	}
+
+	if (!(status & SPU_STATUS_RUNNING)) {
+		/* If isolated LOAD has failed: run SPU, we will get a stop-and
+		 * signal later. */
+		pr_debug("%s: isolated LOAD failed\n", __func__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+		ret = -EACCES;
+		goto out_drop_priv;
+	}
+
+	if (!(status & SPU_STATUS_ISOLATED_STATE)) {
+		/* This isn't allowed by the CBEA, but check anyway */
+		pr_debug("%s: SPU fell out of isolated mode?\n", __func__);
+		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP);
+		ret = -EINVAL;
+		goto out_drop_priv;
+	}
+
+out_drop_priv:
+	/* Finished accessing the loader. Drop kernel mode */
+	sr1 |= MFC_STATE1_PROBLEM_STATE_MASK;
+	spu_mfc_sr1_set(ctx->spu, sr1);
+
+out:
+	return ret;
+}
+
+static int spu_run_init(struct spu_context *ctx, u32 *npc)
+{
+	unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
+	int ret;
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+	/*
+	 * NOSCHED is synchronous scheduling with respect to the caller.
+	 * The caller waits for the context to be loaded.
+	 */
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		if (ctx->state == SPU_STATE_SAVED) {
+			ret = spu_activate(ctx, 0);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/*
+	 * Apply special setup as required.
+	 */
+	if (ctx->flags & SPU_CREATE_ISOLATE) {
+		if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) {
+			ret = spu_setup_isolated(ctx);
+			if (ret)
+				return ret;
+		}
+
+		/*
+		 * If userspace has set the runcntrl register (eg, to
+		 * issue an isolated exit), we need to re-set it here
+		 */
+		runcntl = ctx->ops->runcntl_read(ctx) &
+			(SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+		if (runcntl == 0)
+			runcntl = SPU_RUNCNTL_RUNNABLE;
+	} else {
+		unsigned long privcntl;
+
+		if (test_thread_flag(TIF_SINGLESTEP))
+			privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP;
+		else
+			privcntl = SPU_PRIVCNTL_MODE_NORMAL;
+
+		ctx->ops->privcntl_write(ctx, privcntl);
+		ctx->ops->npc_write(ctx, *npc);
+	}
+
+	ctx->ops->runcntl_write(ctx, runcntl);
+
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		spuctx_switch_state(ctx, SPU_UTIL_USER);
+	} else {
+
+		if (ctx->state == SPU_STATE_SAVED) {
+			ret = spu_activate(ctx, 0);
+			if (ret)
+				return ret;
+		} else {
+			spuctx_switch_state(ctx, SPU_UTIL_USER);
+		}
+	}
+
+	set_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
 	return 0;
 }
 
-static inline int spu_run_fini(struct spu_context *ctx, u32 * npc,
-			       u32 * status)
+static int spu_run_fini(struct spu_context *ctx, u32 *npc,
+			       u32 *status)
 {
 	int ret = 0;
 
+	spu_del_from_rq(ctx);
+
 	*status = ctx->ops->status_read(ctx);
 	*npc = ctx->ops->npc_read(ctx);
+
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	clear_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+	spu_switch_log_notify(NULL, ctx, SWITCH_LOG_EXIT, *status);
 	spu_release(ctx);
 
 	if (signal_pending(current))
 		ret = -ERESTARTSYS;
-	if (unlikely(current->ptrace & PT_PTRACED)) {
-		if ((*status & SPU_STATUS_STOPPED_BY_STOP)
-		    && (*status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) {
-			force_sig(SIGTRAP, current);
-			ret = -ERESTARTSYS;
-		}
-	}
+
 	return ret;
 }
 
-static inline int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc,
-				         u32 *status)
+/*
+ * SPU syscall restarting is tricky because we violate the basic
+ * assumption that the signal handler is running on the interrupted
+ * thread. Here instead, the handler runs on PowerPC user space code,
+ * while the syscall was called from the SPU.
+ * This means we can only do a very rough approximation of POSIX
+ * signal semantics.
+ */
+static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret,
+			  unsigned int *npc)
 {
 	int ret;
 
-	if ((ret = spu_run_fini(ctx, npc, status)) != 0)
-		return ret;
-	if (*status & (SPU_STATUS_STOPPED_BY_STOP |
-		       SPU_STATUS_STOPPED_BY_HALT)) {
-		return *status;
+	switch (*spu_ret) {
+	case -ERESTARTSYS:
+	case -ERESTARTNOINTR:
+		/*
+		 * Enter the regular syscall restarting for
+		 * sys_spu_run, then restart the SPU syscall
+		 * callback.
+		 */
+		*npc -= 8;
+		ret = -ERESTARTSYS;
+		break;
+	case -ERESTARTNOHAND:
+	case -ERESTART_RESTARTBLOCK:
+		/*
+		 * Restart block is too hard for now, just return -EINTR
+		 * to the SPU.
+		 * ERESTARTNOHAND comes from sys_pause, we also return
+		 * -EINTR from there.
+		 * Assume that we need to be restarted ourselves though.
+		 */
+		*spu_ret = -EINTR;
+		ret = -ERESTARTSYS;
+		break;
+	default:
+		printk(KERN_WARNING "%s: unexpected return code %ld\n",
+			__func__, *spu_ret);
+		ret = 0;
 	}
-	if ((ret = spu_run_init(ctx, npc, status)) != 0)
-		return ret;
-	return 0;
+	return ret;
 }
 
-static inline int spu_process_events(struct spu_context *ctx)
+static int spu_process_callback(struct spu_context *ctx)
 {
-	struct spu *spu = ctx->spu;
-	u64 pte_fault = MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED;
-	int ret = 0;
+	struct spu_syscall_block s;
+	u32 ls_pointer, npc;
+	void __iomem *ls;
+	long spu_ret;
+	int ret;
 
-	if (spu->dsisr & pte_fault)
-		ret = spu_irq_class_1_bottom(spu);
-	if (spu->class_0_pending)
-		ret = spu_irq_class_0_bottom(spu);
-	if (!ret && signal_pending(current))
-		ret = -ERESTARTSYS;
+	/* get syscall block from local store */
+	npc = ctx->ops->npc_read(ctx) & ~3;
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+	ls_pointer = in_be32(ls + npc);
+	if (ls_pointer > (LS_SIZE - sizeof(s)))
+		return -EFAULT;
+	memcpy_fromio(&s, ls + ls_pointer, sizeof(s));
+
+	/* do actual syscall without pinning the spu */
+	ret = 0;
+	spu_ret = -ENOSYS;
+	npc += 4;
+
+	if (s.nr_ret < __NR_syscalls) {
+		spu_release(ctx);
+		/* do actual system call from here */
+		spu_ret = spu_sys_callback(&s);
+		if (spu_ret <= -ERESTARTSYS) {
+			ret = spu_handle_restartsys(ctx, &spu_ret, &npc);
+		}
+		mutex_lock(&ctx->state_mutex);
+		if (ret == -ERESTARTSYS)
+			return ret;
+	}
+
+	/* need to re-get the ls, as it may have changed when we released the
+	 * spu */
+	ls = (void __iomem *)ctx->ops->get_ls(ctx);
+
+	/* write result, jump over indirect pointer */
+	memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret));
+	ctx->ops->npc_write(ctx, npc);
+	ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
 	return ret;
 }
 
-long spufs_run_spu(struct file *file, struct spu_context *ctx,
-		   u32 * npc, u32 * status)
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
 {
 	int ret;
+	struct spu *spu;
+	u32 status;
 
-	if (down_interruptible(&ctx->run_sema))
+	if (mutex_lock_interruptible(&ctx->run_mutex))
 		return -ERESTARTSYS;
 
-	ret = spu_run_init(ctx, npc, status);
+	ctx->event_return = 0;
+
+	ret = spu_acquire(ctx);
 	if (ret)
+		goto out_unlock;
+
+	spu_enable_spu(ctx);
+
+	spu_update_sched_info(ctx);
+
+	ret = spu_run_init(ctx, npc);
+	if (ret) {
+		spu_release(ctx);
 		goto out;
+	}
 
 	do {
-		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, status));
-		if (unlikely(ret))
+		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
+		if (unlikely(ret)) {
+			/*
+			 * This is nasty: we need the state_mutex for all the
+			 * bookkeeping even if the syscall was interrupted by
+			 * a signal. ewww.
+			 */
+			mutex_lock(&ctx->state_mutex);
 			break;
-		if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
-			ret = spu_reacquire_runnable(ctx, npc, status);
+		}
+		spu = ctx->spu;
+		if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
+						&ctx->sched_flags))) {
+			if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
+				spu_switch_notify(spu, ctx);
+				continue;
+			}
+		}
+
+		spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+		if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+		    (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
+			ret = spu_process_callback(ctx);
 			if (ret)
-				goto out;
-			continue;
+				break;
+			status &= ~SPU_STATUS_STOPPED_BY_STOP;
 		}
-		ret = spu_process_events(ctx);
+		ret = spufs_handle_class1(ctx);
+		if (ret)
+			break;
 
-	} while (!ret && !(*status & (SPU_STATUS_STOPPED_BY_STOP |
-				      SPU_STATUS_STOPPED_BY_HALT)));
+		ret = spufs_handle_class0(ctx);
+		if (ret)
+			break;
 
-	ctx->ops->runcntl_stop(ctx);
-	ret = spu_run_fini(ctx, npc, status);
-	if (!ret)
-		ret = *status;
+		if (signal_pending(current))
+			ret = -ERESTARTSYS;
+	} while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
+				      SPU_STATUS_STOPPED_BY_HALT |
+				       SPU_STATUS_SINGLE_STEP)));
+
+	spu_disable_spu(ctx);
+	ret = spu_run_fini(ctx, npc, &status);
 	spu_yield(ctx);
 
+	if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	    (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
+		ctx->stats.libassist++;
+
+	if ((ret == 0) ||
+	    ((ret == -ERESTARTSYS) &&
+	     ((status & SPU_STATUS_STOPPED_BY_HALT) ||
+	      (status & SPU_STATUS_SINGLE_STEP) ||
+	      ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	       (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
+		ret = status;
+
+	/* Note: we don't need to force_sig SIGTRAP on single-step
+	 * since we have TIF_SINGLESTEP set, thus the kernel will do
+	 * it upon return from the syscall anyawy
+	 */
+	if (unlikely(status & SPU_STATUS_SINGLE_STEP))
+		ret = -ERESTARTSYS;
+
+	else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP)
+	    && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) {
+		force_sig(SIGTRAP, current);
+		ret = -ERESTARTSYS;
+	}
+
 out:
-	up(&ctx->run_sema);
+	*event = ctx->event_return;
+out_unlock:
+	mutex_unlock(&ctx->run_mutex);
 	return ret;
 }
-
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 963182fbd1a..4a0a64fe25d 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -3,11 +3,7 @@
  * Copyright (C) IBM 2005
  * Author: Mark Nutter <mnutter@us.ibm.com>
  *
- * SPU scheduler, based on Linux thread priority.  For now use
- * a simple "cooperative" yield model with no preemption.  SPU
- * scheduling will eventually be preemptive: When a thread with
- * a higher static priority gets ready to run, then an active SPU
- * context will be preempted and returned to the waitq.
+ * 2006-03-31	NUMA domains added.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -26,436 +22,1153 @@
 
 #undef DEBUG
 
-#include <linux/config.h>
-#include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
 #include <linux/completion.h>
 #include <linux/vmalloc.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
+#include <linux/numa.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/pid_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 #include <asm/io.h>
 #include <asm/mmu_context.h>
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
+#include <asm/spu_priv1.h>
 #include "spufs.h"
+#define CREATE_TRACE_POINTS
+#include "sputrace.h"
 
-#define SPU_MIN_TIMESLICE 	(100 * HZ / 1000)
-
-#define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1)
 struct spu_prio_array {
-	atomic_t nr_blocked;
-	unsigned long bitmap[SPU_BITMAP_SIZE];
-	wait_queue_head_t waitq[MAX_PRIO];
+	DECLARE_BITMAP(bitmap, MAX_PRIO);
+	struct list_head runq[MAX_PRIO];
+	spinlock_t runq_lock;
+	int nr_waiting;
 };
 
-/* spu_runqueue - This is the main runqueue data structure for SPUs. */
-struct spu_runqueue {
-	struct semaphore sem;
-	unsigned long nr_active;
-	unsigned long nr_idle;
-	unsigned long nr_switches;
-	struct list_head active_list;
-	struct list_head idle_list;
-	struct spu_prio_array prio;
-};
+static unsigned long spu_avenrun[3];
+static struct spu_prio_array *spu_prio;
+static struct task_struct *spusched_task;
+static struct timer_list spusched_timer;
+static struct timer_list spuloadavg_timer;
 
-static struct spu_runqueue *spu_runqueues = NULL;
+/*
+ * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
+ */
+#define NORMAL_PRIO		120
 
-static inline struct spu_runqueue *spu_rq(void)
-{
-	/* Future: make this a per-NODE array,
-	 * and use cpu_to_node(smp_processor_id())
-	 */
-	return spu_runqueues;
-}
+/*
+ * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
+ * tick for every 10 CPU scheduler ticks.
+ */
+#define SPUSCHED_TICK		(10)
 
-static inline struct spu *del_idle(struct spu_runqueue *rq)
-{
-	struct spu *spu;
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is
+ * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs.
+ */
+#define MIN_SPU_TIMESLICE	max(5 * HZ / (1000 * SPUSCHED_TICK), 1)
+#define DEF_SPU_TIMESLICE	(100 * HZ / (1000 * SPUSCHED_TICK))
 
-	BUG_ON(rq->nr_idle <= 0);
-	BUG_ON(list_empty(&rq->idle_list));
-	/* Future: Move SPU out of low-power SRI state. */
-	spu = list_entry(rq->idle_list.next, struct spu, sched_list);
-	list_del_init(&spu->sched_list);
-	rq->nr_idle--;
-	return spu;
-}
+#define SCALE_PRIO(x, prio) \
+	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)
 
-static inline void del_active(struct spu_runqueue *rq, struct spu *spu)
+/*
+ * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
+ * [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+void spu_set_timeslice(struct spu_context *ctx)
 {
-	BUG_ON(rq->nr_active <= 0);
-	BUG_ON(list_empty(&rq->active_list));
-	list_del_init(&spu->sched_list);
-	rq->nr_active--;
+	if (ctx->prio < NORMAL_PRIO)
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio);
+	else
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
 }
 
-static inline void add_idle(struct spu_runqueue *rq, struct spu *spu)
+/*
+ * Update scheduling information from the owning thread.
+ */
+void __spu_update_sched_info(struct spu_context *ctx)
 {
-	/* Future: Put SPU into low-power SRI state. */
-	list_add_tail(&spu->sched_list, &rq->idle_list);
-	rq->nr_idle++;
-}
+	/*
+	 * assert that the context is not on the runqueue, so it is safe
+	 * to change its scheduling parameters.
+	 */
+	BUG_ON(!list_empty(&ctx->rq));
 
-static inline void add_active(struct spu_runqueue *rq, struct spu *spu)
-{
-	rq->nr_active++;
-	rq->nr_switches++;
-	list_add_tail(&spu->sched_list, &rq->active_list);
+	/*
+	 * 32-Bit assignments are atomic on powerpc, and we don't care about
+	 * memory ordering here because retrieving the controlling thread is
+	 * per definition racy.
+	 */
+	ctx->tid = current->pid;
+
+	/*
+	 * We do our own priority calculations, so we normally want
+	 * ->static_prio to start with. Unfortunately this field
+	 * contains junk for threads with a realtime scheduling
+	 * policy so we have to look at ->prio in this case.
+	 */
+	if (rt_prio(current->prio))
+		ctx->prio = current->prio;
+	else
+		ctx->prio = current->static_prio;
+	ctx->policy = current->policy;
+
+	/*
+	 * TO DO: the context may be loaded, so we may need to activate
+	 * it again on a different node. But it shouldn't hurt anything
+	 * to update its parameters, because we know that the scheduler
+	 * is not actively looking at this field, since it is not on the
+	 * runqueue. The context will be rescheduled on the proper node
+	 * if it is timesliced or preempted.
+	 */
+	cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current));
+
+	/* Save the current cpu id for spu interrupt routing. */
+	ctx->last_ran = raw_smp_processor_id();
 }
 
-static void prio_wakeup(struct spu_runqueue *rq)
+void spu_update_sched_info(struct spu_context *ctx)
 {
-	if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) {
-		int best = sched_find_first_bit(rq->prio.bitmap);
-		if (best < MAX_PRIO) {
-			wait_queue_head_t *wq = &rq->prio.waitq[best];
-			wake_up_interruptible_nr(wq, 1);
-		}
+	int node;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		node = ctx->spu->node;
+
+		/*
+		 * Take list_mutex to sync with find_victim().
+		 */
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		__spu_update_sched_info(ctx);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	} else {
+		__spu_update_sched_info(ctx);
 	}
 }
 
-static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx,
-		      u64 flags)
+static int __node_allowed(struct spu_context *ctx, int node)
 {
-	int prio = current->prio;
-	wait_queue_head_t *wq = &rq->prio.waitq[prio];
-	DEFINE_WAIT(wait);
+	if (nr_cpus_node(node)) {
+		const struct cpumask *mask = cpumask_of_node(node);
 
-	__set_bit(prio, rq->prio.bitmap);
-	atomic_inc(&rq->prio.nr_blocked);
-	prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE);
-	if (!signal_pending(current)) {
-		up(&rq->sem);
-		up_write(&ctx->state_sema);
-		pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__,
-			 current->pid, current->prio);
-		schedule();
-		down_write(&ctx->state_sema);
-		down(&rq->sem);
+		if (cpumask_intersects(mask, &ctx->cpus_allowed))
+			return 1;
 	}
-	finish_wait(wq, &wait);
-	atomic_dec(&rq->prio.nr_blocked);
-	if (!waitqueue_active(wq))
-		__clear_bit(prio, rq->prio.bitmap);
+
+	return 0;
 }
 
-static inline int is_best_prio(struct spu_runqueue *rq)
+static int node_allowed(struct spu_context *ctx, int node)
 {
-	int best_prio;
+	int rval;
+
+	spin_lock(&spu_prio->runq_lock);
+	rval = __node_allowed(ctx, node);
+	spin_unlock(&spu_prio->runq_lock);
 
-	best_prio = sched_find_first_bit(rq->prio.bitmap);
-	return (current->prio < best_prio) ? 1 : 0;
+	return rval;
 }
 
-static inline void mm_needs_global_tlbie(struct mm_struct *mm)
+void do_notify_spus_active(void)
 {
-	/* Global TLBIE broadcast required with SPEs. */
-#if (NR_CPUS > 1)
-	__cpus_setall(&mm->cpu_vm_mask, NR_CPUS);
-#else
-	__cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */
-#endif
+	int node;
+
+	/*
+	 * Wake up the active spu_contexts.
+	 *
+	 * When the awakened processes see their "notify_active" flag is set,
+	 * they will call spu_switch_notify().
+	 */
+	for_each_online_node(node) {
+		struct spu *spu;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->alloc_state != SPU_FREE) {
+				struct spu_context *ctx = spu->ctx;
+				set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+					&ctx->sched_flags);
+				mb();
+				wake_up_all(&ctx->stop_wq);
+			}
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
 }
 
-static inline void bind_context(struct spu *spu, struct spu_context *ctx)
+/**
+ * spu_bind_context - bind spu context to physical spu
+ * @spu:	physical spu to bind to
+ * @ctx:	context to bind
+ */
+static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
 {
-	pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid,
-		 spu->number);
+	spu_context_trace(spu_bind_context__enter, ctx, spu);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
+
+	ctx->stats.slb_flt_base = spu->stats.slb_flt;
+	ctx->stats.class2_intr_base = spu->stats.class2_intr;
+
+	spu_associate_mm(spu, ctx->owner);
+
+	spin_lock_irq(&spu->register_lock);
 	spu->ctx = ctx;
 	spu->flags = 0;
-	ctx->flags = 0;
 	ctx->spu = spu;
 	ctx->ops = &spu_hw_ops;
 	spu->pid = current->pid;
-	spu->prio = current->prio;
-	spu->mm = ctx->owner;
-	mm_needs_global_tlbie(spu->mm);
+	spu->tgid = current->tgid;
 	spu->ibox_callback = spufs_ibox_callback;
 	spu->wbox_callback = spufs_wbox_callback;
 	spu->stop_callback = spufs_stop_callback;
-	mb();
+	spu->mfc_callback = spufs_mfc_callback;
+	spin_unlock_irq(&spu->register_lock);
+
 	spu_unmap_mappings(ctx);
+
+	spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0);
 	spu_restore(&ctx->csa, spu);
 	spu->timestamp = jiffies;
+	spu_switch_notify(spu, ctx);
+	ctx->state = SPU_STATE_RUNNABLE;
+
+	spuctx_switch_state(ctx, SPU_UTIL_USER);
+}
+
+/*
+ * Must be used with the list_mutex held.
+ */
+static inline int sched_spu(struct spu *spu)
+{
+	BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
+	return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
+}
+
+static void aff_merge_remaining_ctxs(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+
+	list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
+		if (list_empty(&ctx->aff_list))
+			list_add(&ctx->aff_list, &gang->aff_list_head);
+	}
+	gang->aff_flags |= AFF_MERGED;
+}
+
+static void aff_set_offsets(struct spu_gang *gang)
+{
+	struct spu_context *ctx;
+	int offset;
+
+	offset = -1;
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset--;
+	}
+
+	offset = 0;
+	list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		ctx->aff_offset = offset++;
+	}
+
+	gang->aff_flags |= AFF_OFFSETS_SET;
+}
+
+static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
+		 int group_size, int lowest_offset)
+{
+	struct spu *spu;
+	int node, n;
+
+	/*
+	 * TODO: A better algorithm could be used to find a good spu to be
+	 *       used as reference location for the ctxs chain.
+	 */
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		/*
+		 * "available_spus" counts how many spus are not potentially
+		 * going to be used by other affinity gangs whose reference
+		 * context is already in place. Although this code seeks to
+		 * avoid having affinity gangs with a summed amount of
+		 * contexts bigger than the amount of spus in the node,
+		 * this may happen sporadically. In this case, available_spus
+		 * becomes negative, which is harmless.
+		 */
+		int available_spus;
+
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		available_spus = 0;
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->ctx && spu->ctx->gang && !spu->ctx->aff_offset
+					&& spu->ctx->gang->aff_ref_spu)
+				available_spus -= spu->ctx->gang->contexts;
+			available_spus++;
+		}
+		if (available_spus < ctx->gang->contexts) {
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+			continue;
+		}
+
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if ((!mem_aff || spu->has_mem_affinity) &&
+							sched_spu(spu)) {
+				mutex_unlock(&cbe_spu_info[node].list_mutex);
+				return spu;
+			}
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+	}
+	return NULL;
+}
+
+static void aff_set_ref_point_location(struct spu_gang *gang)
+{
+	int mem_aff, gs, lowest_offset;
+	struct spu_context *ctx;
+	struct spu *tmp;
+
+	mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
+	lowest_offset = 0;
+	gs = 0;
+
+	list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+		gs++;
+
+	list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+								aff_list) {
+		if (&ctx->aff_list == &gang->aff_list_head)
+			break;
+		lowest_offset = ctx->aff_offset;
+	}
+
+	gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs,
+							lowest_offset);
+}
+
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
+{
+	struct spu *spu;
+
+	spu = NULL;
+	if (offset >= 0) {
+		list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+			BUG_ON(spu->node != node);
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset--;
+		}
+	} else {
+		list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+			BUG_ON(spu->node != node);
+			if (offset == 0)
+				break;
+			if (sched_spu(spu))
+				offset++;
+		}
+	}
+
+	return spu;
 }
 
-static inline void unbind_context(struct spu *spu, struct spu_context *ctx)
+/*
+ * affinity_check is called each time a context is going to be scheduled.
+ * It returns the spu ptr on which the context must run.
+ */
+static int has_affinity(struct spu_context *ctx)
 {
-	pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__,
-		 spu->pid, spu->number);
+	struct spu_gang *gang = ctx->gang;
+
+	if (list_empty(&ctx->aff_list))
+		return 0;
+
+	if (atomic_read(&ctx->gang->aff_sched_count) == 0)
+		ctx->gang->aff_ref_spu = NULL;
+
+	if (!gang->aff_ref_spu) {
+		if (!(gang->aff_flags & AFF_MERGED))
+			aff_merge_remaining_ctxs(gang);
+		if (!(gang->aff_flags & AFF_OFFSETS_SET))
+			aff_set_offsets(gang);
+		aff_set_ref_point_location(gang);
+	}
+
+	return gang->aff_ref_spu != NULL;
+}
+
+/**
+ * spu_unbind_context - unbind spu context from physical spu
+ * @spu:	physical spu to unbind from
+ * @ctx:	context to unbind
+ */
+static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
+{
+	u32 status;
+
+	spu_context_trace(spu_unbind_context__enter, ctx, spu);
+
+	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ 	if (spu->ctx->flags & SPU_CREATE_NOSCHED)
+		atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
+
+	if (ctx->gang)
+		/*
+		 * If ctx->gang->aff_sched_count is positive, SPU affinity is
+		 * being considered in this gang. Using atomic_dec_if_positive
+		 * allow us to skip an explicit check for affinity in this gang
+		 */
+		atomic_dec_if_positive(&ctx->gang->aff_sched_count);
+
+	spu_switch_notify(spu, NULL);
 	spu_unmap_mappings(ctx);
 	spu_save(&ctx->csa, spu);
+	spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0);
+
+	spin_lock_irq(&spu->register_lock);
 	spu->timestamp = jiffies;
 	ctx->state = SPU_STATE_SAVED;
 	spu->ibox_callback = NULL;
 	spu->wbox_callback = NULL;
 	spu->stop_callback = NULL;
-	spu->mm = NULL;
+	spu->mfc_callback = NULL;
 	spu->pid = 0;
-	spu->prio = MAX_PRIO;
+	spu->tgid = 0;
 	ctx->ops = &spu_backing_ops;
-	ctx->spu = NULL;
-	ctx->flags = 0;
 	spu->flags = 0;
 	spu->ctx = NULL;
+	spin_unlock_irq(&spu->register_lock);
+
+	spu_associate_mm(spu, NULL);
+
+	ctx->stats.slb_flt +=
+		(spu->stats.slb_flt - ctx->stats.slb_flt_base);
+	ctx->stats.class2_intr +=
+		(spu->stats.class2_intr - ctx->stats.class2_intr_base);
+
+	/* This maps the underlying spu state to idle */
+	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	ctx->spu = NULL;
+
+	if (spu_stopped(ctx, &status))
+		wake_up_all(&ctx->stop_wq);
 }
 
-static void spu_reaper(void *data)
+/**
+ * spu_add_to_rq - add a context to the runqueue
+ * @ctx:       context to add
+ */
+static void __spu_add_to_rq(struct spu_context *ctx)
 {
-	struct spu_context *ctx = data;
-	struct spu *spu;
+	/*
+	 * Unfortunately this code path can be called from multiple threads
+	 * on behalf of a single context due to the way the problem state
+	 * mmap support works.
+	 *
+	 * Fortunately we need to wake up all these threads at the same time
+	 * and can simply skip the runqueue addition for every but the first
+	 * thread getting into this codepath.
+	 *
+	 * It's still quite hacky, and long-term we should proxy all other
+	 * threads through the owner thread so that spu_run is in control
+	 * of all the scheduling activity for a given context.
+	 */
+	if (list_empty(&ctx->rq)) {
+		list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
+		set_bit(ctx->prio, spu_prio->bitmap);
+		if (!spu_prio->nr_waiting++)
+			mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	}
+}
 
-	down_write(&ctx->state_sema);
-	spu = ctx->spu;
-	if (spu && test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) {
-		if (atomic_read(&spu->rq->prio.nr_blocked)) {
-			pr_debug("%s: spu=%d\n", __func__, spu->number);
-			ctx->ops->runcntl_stop(ctx);
-			spu_deactivate(ctx);
-			wake_up_all(&ctx->stop_wq);
-		} else {
-			clear_bit(SPU_CONTEXT_PREEMPT, &ctx->flags);
-		}
+static void spu_add_to_rq(struct spu_context *ctx)
+{
+	spin_lock(&spu_prio->runq_lock);
+	__spu_add_to_rq(ctx);
+	spin_unlock(&spu_prio->runq_lock);
+}
+
+static void __spu_del_from_rq(struct spu_context *ctx)
+{
+	int prio = ctx->prio;
+
+	if (!list_empty(&ctx->rq)) {
+		if (!--spu_prio->nr_waiting)
+			del_timer(&spusched_timer);
+		list_del_init(&ctx->rq);
+
+		if (list_empty(&spu_prio->runq[prio]))
+			clear_bit(prio, spu_prio->bitmap);
 	}
-	up_write(&ctx->state_sema);
-	put_spu_context(ctx);
 }
 
-static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu)
+void spu_del_from_rq(struct spu_context *ctx)
 {
-	struct spu_context *ctx = get_spu_context(spu->ctx);
-	unsigned long now = jiffies;
-	unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE;
+	spin_lock(&spu_prio->runq_lock);
+	__spu_del_from_rq(ctx);
+	spin_unlock(&spu_prio->runq_lock);
+}
 
-	set_bit(SPU_CONTEXT_PREEMPT, &ctx->flags);
-	INIT_WORK(&ctx->reap_work, spu_reaper, ctx);
-	if (time_after(now, expire))
-		schedule_work(&ctx->reap_work);
-	else
-		schedule_delayed_work(&ctx->reap_work, expire - now);
+static void spu_prio_wait(struct spu_context *ctx)
+{
+	DEFINE_WAIT(wait);
+
+	/*
+	 * The caller must explicitly wait for a context to be loaded
+	 * if the nosched flag is set.  If NOSCHED is not set, the caller
+	 * queues the context and waits for an spu event or error.
+	 */
+	BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED));
+
+	spin_lock(&spu_prio->runq_lock);
+	prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
+	if (!signal_pending(current)) {
+		__spu_add_to_rq(ctx);
+		spin_unlock(&spu_prio->runq_lock);
+		mutex_unlock(&ctx->state_mutex);
+		schedule();
+		mutex_lock(&ctx->state_mutex);
+		spin_lock(&spu_prio->runq_lock);
+		__spu_del_from_rq(ctx);
+	}
+	spin_unlock(&spu_prio->runq_lock);
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&ctx->stop_wq, &wait);
 }
 
-static void check_preempt_active(struct spu_runqueue *rq)
+static struct spu *spu_get_idle(struct spu_context *ctx)
 {
-	struct list_head *p;
-	struct spu *worst = NULL;
+	struct spu *spu, *aff_ref_spu;
+	int node, n;
 
-	list_for_each(p, &rq->active_list) {
-		struct spu *spu = list_entry(p, struct spu, sched_list);
-		struct spu_context *ctx = spu->ctx;
-		if (!test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) {
-			if (!worst || (spu->prio > worst->prio)) {
-				worst = spu;
-			}
+	spu_context_nospu_trace(spu_get_idle__enter, ctx);
+
+	if (ctx->gang) {
+		mutex_lock(&ctx->gang->aff_mutex);
+		if (has_affinity(ctx)) {
+			aff_ref_spu = ctx->gang->aff_ref_spu;
+			atomic_inc(&ctx->gang->aff_sched_count);
+			mutex_unlock(&ctx->gang->aff_mutex);
+			node = aff_ref_spu->node;
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			spu = ctx_location(aff_ref_spu, ctx->aff_offset, node);
+			if (spu && spu->alloc_state == SPU_FREE)
+				goto found;
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			atomic_dec(&ctx->gang->aff_sched_count);
+			goto not_found;
+		}
+		mutex_unlock(&ctx->gang->aff_mutex);
+	}
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->alloc_state == SPU_FREE)
+				goto found;
 		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
 	}
-	if (worst && (current->prio < worst->prio))
-		schedule_spu_reaper(rq, worst);
+
+ not_found:
+	spu_context_nospu_trace(spu_get_idle__not_found, ctx);
+	return NULL;
+
+ found:
+	spu->alloc_state = SPU_USED;
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	spu_context_trace(spu_get_idle__found, ctx, spu);
+	spu_init_channels(spu);
+	return spu;
 }
 
-static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags)
+/**
+ * find_victim - find a lower priority context to preempt
+ * @ctx:	canidate context for running
+ *
+ * Returns the freed physical spu to run the new context on.
+ */
+static struct spu *find_victim(struct spu_context *ctx)
 {
-	struct spu_runqueue *rq;
-	struct spu *spu = NULL;
+	struct spu_context *victim = NULL;
+	struct spu *spu;
+	int node, n;
 
-	rq = spu_rq();
-	down(&rq->sem);
-	for (;;) {
-		if (rq->nr_idle > 0) {
-			if (is_best_prio(rq)) {
-				/* Fall through. */
-				spu = del_idle(rq);
-				break;
-			} else {
-				prio_wakeup(rq);
-				up(&rq->sem);
-				yield();
-				if (signal_pending(current)) {
-					return NULL;
-				}
-				rq = spu_rq();
-				down(&rq->sem);
-				continue;
+	spu_context_nospu_trace(spu_find_victim__enter, ctx);
+
+	/*
+	 * Look for a possible preemption candidate on the local node first.
+	 * If there is no candidate look at the other nodes.  This isn't
+	 * exactly fair, but so far the whole spu scheduler tries to keep
+	 * a strong node affinity.  We might want to fine-tune this in
+	 * the future.
+	 */
+ restart:
+	node = cpu_to_node(raw_smp_processor_id());
+	for (n = 0; n < MAX_NUMNODES; n++, node++) {
+		node = (node < MAX_NUMNODES) ? node : 0;
+		if (!node_allowed(ctx, node))
+			continue;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			struct spu_context *tmp = spu->ctx;
+
+			if (tmp && tmp->prio > ctx->prio &&
+			    !(tmp->flags & SPU_CREATE_NOSCHED) &&
+			    (!victim || tmp->prio > victim->prio)) {
+				victim = spu->ctx;
 			}
-		} else {
-			check_preempt_active(rq);
-			prio_wait(rq, ctx, flags);
-			if (signal_pending(current)) {
-				prio_wakeup(rq);
-				spu = NULL;
-				break;
+		}
+		if (victim)
+			get_spu_context(victim);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+		if (victim) {
+			/*
+			 * This nests ctx->state_mutex, but we always lock
+			 * higher priority contexts before lower priority
+			 * ones, so this is safe until we introduce
+			 * priority inheritance schemes.
+			 *
+			 * XXX if the highest priority context is locked,
+			 * this can loop a long time.  Might be better to
+			 * look at another context or give up after X retries.
+			 */
+			if (!mutex_trylock(&victim->state_mutex)) {
+				put_spu_context(victim);
+				victim = NULL;
+				goto restart;
 			}
-			continue;
+
+			spu = victim->spu;
+			if (!spu || victim->prio <= ctx->prio) {
+				/*
+				 * This race can happen because we've dropped
+				 * the active list mutex.  Not a problem, just
+				 * restart the search.
+				 */
+				mutex_unlock(&victim->state_mutex);
+				put_spu_context(victim);
+				victim = NULL;
+				goto restart;
+			}
+
+			spu_context_trace(__spu_deactivate__unload, ctx, spu);
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			cbe_spu_info[node].nr_active--;
+			spu_unbind_context(spu, victim);
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			victim->stats.invol_ctx_switch++;
+			spu->stats.invol_ctx_switch++;
+			if (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags))
+				spu_add_to_rq(victim);
+
+			mutex_unlock(&victim->state_mutex);
+			put_spu_context(victim);
+
+			return spu;
 		}
 	}
-	up(&rq->sem);
-	return spu;
+
+	return NULL;
 }
 
-static void put_idle_spu(struct spu *spu)
+static void __spu_schedule(struct spu *spu, struct spu_context *ctx)
 {
-	struct spu_runqueue *rq = spu->rq;
+	int node = spu->node;
+	int success = 0;
+
+	spu_set_timeslice(ctx);
 
-	down(&rq->sem);
-	add_idle(rq, spu);
-	prio_wakeup(rq);
-	up(&rq->sem);
+	mutex_lock(&cbe_spu_info[node].list_mutex);
+	if (spu->ctx == NULL) {
+		spu_bind_context(spu, ctx);
+		cbe_spu_info[node].nr_active++;
+		spu->alloc_state = SPU_USED;
+		success = 1;
+	}
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+	if (success)
+		wake_up_all(&ctx->run_wq);
+	else
+		spu_add_to_rq(ctx);
 }
 
-static int get_active_spu(struct spu *spu)
+static void spu_schedule(struct spu *spu, struct spu_context *ctx)
 {
-	struct spu_runqueue *rq = spu->rq;
-	struct list_head *p;
-	struct spu *tmp;
-	int rc = 0;
-
-	down(&rq->sem);
-	list_for_each(p, &rq->active_list) {
-		tmp = list_entry(p, struct spu, sched_list);
-		if (tmp == spu) {
-			del_active(rq, spu);
-			rc = 1;
-			break;
-		}
-	}
-	up(&rq->sem);
-	return rc;
+	/* not a candidate for interruptible because it's called either
+	   from the scheduler thread or from spu_deactivate */
+	mutex_lock(&ctx->state_mutex);
+	if (ctx->state == SPU_STATE_SAVED)
+		__spu_schedule(spu, ctx);
+	spu_release(ctx);
 }
 
-static void put_active_spu(struct spu *spu)
+/**
+ * spu_unschedule - remove a context from a spu, and possibly release it.
+ * @spu:	The SPU to unschedule from
+ * @ctx:	The context currently scheduled on the SPU
+ * @free_spu	Whether to free the SPU for other contexts
+ *
+ * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the
+ * SPU is made available for other contexts (ie, may be returned by
+ * spu_get_idle). If this is zero, the caller is expected to schedule another
+ * context to this spu.
+ *
+ * Should be called with ctx->state_mutex held.
+ */
+static void spu_unschedule(struct spu *spu, struct spu_context *ctx,
+		int free_spu)
 {
-	struct spu_runqueue *rq = spu->rq;
+	int node = spu->node;
 
-	down(&rq->sem);
-	add_active(rq, spu);
-	up(&rq->sem);
+	mutex_lock(&cbe_spu_info[node].list_mutex);
+	cbe_spu_info[node].nr_active--;
+	if (free_spu)
+		spu->alloc_state = SPU_FREE;
+	spu_unbind_context(spu, ctx);
+	ctx->stats.invol_ctx_switch++;
+	spu->stats.invol_ctx_switch++;
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
 }
 
-/* Lock order:
- *	spu_activate() & spu_deactivate() require the
- *	caller to have down_write(&ctx->state_sema).
+/**
+ * spu_activate - find a free spu for a context and execute it
+ * @ctx:	spu context to schedule
+ * @flags:	flags (currently ignored)
  *
- *	The rq->sem is breifly held (inside or outside a
- *	given ctx lock) for list management, but is never
- *	held during save/restore.
+ * Tries to find a free spu to run @ctx.  If no free spu is available
+ * add the context to the runqueue so it gets woken up once an spu
+ * is available.
  */
-
-int spu_activate(struct spu_context *ctx, u64 flags)
+int spu_activate(struct spu_context *ctx, unsigned long flags)
 {
 	struct spu *spu;
 
+	/*
+	 * If there are multiple threads waiting for a single context
+	 * only one actually binds the context while the others will
+	 * only be able to acquire the state_mutex once the context
+	 * already is in runnable state.
+	 */
 	if (ctx->spu)
 		return 0;
-	spu = get_idle_spu(ctx, flags);
-	if (!spu)
-		return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN;
-	bind_context(spu, ctx);
+
+spu_activate_top:
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+
+	spu = spu_get_idle(ctx);
 	/*
-	 * We're likely to wait for interrupts on the same
-	 * CPU that we are now on, so send them here.
+	 * If this is a realtime thread we try to get it running by
+	 * preempting a lower priority thread.
 	 */
-	spu_irq_setaffinity(spu, raw_smp_processor_id());
-	put_active_spu(spu);
+	if (!spu && rt_prio(ctx->prio))
+		spu = find_victim(ctx);
+	if (spu) {
+		unsigned long runcntl;
+
+		runcntl = ctx->ops->runcntl_read(ctx);
+		__spu_schedule(spu, ctx);
+		if (runcntl & SPU_RUNCNTL_RUNNABLE)
+			spuctx_switch_state(ctx, SPU_UTIL_USER);
+
+		return 0;
+	}
+
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		spu_prio_wait(ctx);
+		goto spu_activate_top;
+	}
+
+	spu_add_to_rq(ctx);
+
 	return 0;
 }
 
+/**
+ * grab_runnable_context - try to find a runnable context
+ *
+ * Remove the highest priority context on the runqueue and return it
+ * to the caller.  Returns %NULL if no runnable context was found.
+ */
+static struct spu_context *grab_runnable_context(int prio, int node)
+{
+	struct spu_context *ctx;
+	int best;
+
+	spin_lock(&spu_prio->runq_lock);
+	best = find_first_bit(spu_prio->bitmap, prio);
+	while (best < prio) {
+		struct list_head *rq = &spu_prio->runq[best];
+
+		list_for_each_entry(ctx, rq, rq) {
+			/* XXX(hch): check for affinity here as well */
+			if (__node_allowed(ctx, node)) {
+				__spu_del_from_rq(ctx);
+				goto found;
+			}
+		}
+		best++;
+	}
+	ctx = NULL;
+ found:
+	spin_unlock(&spu_prio->runq_lock);
+	return ctx;
+}
+
+static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
+{
+	struct spu *spu = ctx->spu;
+	struct spu_context *new = NULL;
+
+	if (spu) {
+		new = grab_runnable_context(max_prio, spu->node);
+		if (new || force) {
+			spu_unschedule(spu, ctx, new == NULL);
+			if (new) {
+				if (new->flags & SPU_CREATE_NOSCHED)
+					wake_up(&new->stop_wq);
+				else {
+					spu_release(ctx);
+					spu_schedule(spu, new);
+					/* this one can't easily be made
+					   interruptible */
+					mutex_lock(&ctx->state_mutex);
+				}
+			}
+		}
+	}
+
+	return new != NULL;
+}
+
+/**
+ * spu_deactivate - unbind a context from it's physical spu
+ * @ctx:	spu context to unbind
+ *
+ * Unbind @ctx from the physical spu it is running on and schedule
+ * the highest priority context to run on the freed physical spu.
+ */
 void spu_deactivate(struct spu_context *ctx)
 {
-	struct spu *spu;
-	int needs_idle;
+	spu_context_nospu_trace(spu_deactivate__enter, ctx);
+	__spu_deactivate(ctx, 1, MAX_PRIO);
+}
+
+/**
+ * spu_yield -	yield a physical spu if others are waiting
+ * @ctx:	spu context to yield
+ *
+ * Check if there is a higher priority context waiting and if yes
+ * unbind @ctx from the physical spu and schedule the highest
+ * priority context to run on the freed physical spu instead.
+ */
+void spu_yield(struct spu_context *ctx)
+{
+	spu_context_nospu_trace(spu_yield__enter, ctx);
+	if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
+		mutex_lock(&ctx->state_mutex);
+		__spu_deactivate(ctx, 0, MAX_PRIO);
+		mutex_unlock(&ctx->state_mutex);
+	}
+}
+
+static noinline void spusched_tick(struct spu_context *ctx)
+{
+	struct spu_context *new = NULL;
+	struct spu *spu = NULL;
+
+	if (spu_acquire(ctx))
+		BUG();	/* a kernel thread never has signals pending */
+
+	if (ctx->state != SPU_STATE_RUNNABLE)
+		goto out;
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		goto out;
+	if (ctx->policy == SCHED_FIFO)
+		goto out;
+
+	if (--ctx->time_slice && test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+		goto out;
 
 	spu = ctx->spu;
-	if (!spu)
-		return;
-	needs_idle = get_active_spu(spu);
-	unbind_context(spu, ctx);
-	if (needs_idle)
-		put_idle_spu(spu);
+
+	spu_context_trace(spusched_tick__preempt, ctx, spu);
+
+	new = grab_runnable_context(ctx->prio + 1, spu->node);
+	if (new) {
+		spu_unschedule(spu, ctx, 0);
+		if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+			spu_add_to_rq(ctx);
+	} else {
+		spu_context_nospu_trace(spusched_tick__newslice, ctx);
+		if (!ctx->time_slice)
+			ctx->time_slice++;
+	}
+out:
+	spu_release(ctx);
+
+	if (new)
+		spu_schedule(spu, new);
 }
 
-void spu_yield(struct spu_context *ctx)
+/**
+ * count_active_contexts - count nr of active tasks
+ *
+ * Return the number of tasks currently running or waiting to run.
+ *
+ * Note that we don't take runq_lock / list_mutex here.  Reading
+ * a single 32bit value is atomic on powerpc, and we don't care
+ * about memory ordering issues here.
+ */
+static unsigned long count_active_contexts(void)
+{
+	int nr_active = 0, node;
+
+	for (node = 0; node < MAX_NUMNODES; node++)
+		nr_active += cbe_spu_info[node].nr_active;
+	nr_active += spu_prio->nr_waiting;
+
+	return nr_active;
+}
+
+/**
+ * spu_calc_load - update the avenrun load estimates.
+ *
+ * No locking against reading these values from userspace, as for
+ * the CPU loadavg code.
+ */
+static void spu_calc_load(void)
+{
+	unsigned long active_tasks; /* fixed-point */
+
+	active_tasks = count_active_contexts() * FIXED_1;
+	CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks);
+	CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks);
+	CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
+}
+
+static void spusched_wake(unsigned long data)
+{
+	mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	wake_up_process(spusched_task);
+}
+
+static void spuloadavg_wake(unsigned long data)
+{
+	mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ);
+	spu_calc_load();
+}
+
+static int spusched_thread(void *unused)
 {
 	struct spu *spu;
-	int need_yield = 0;
+	int node;
 
-	down_write(&ctx->state_sema);
-	spu = ctx->spu;
-	if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) {
-		pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number);
-		spu_deactivate(ctx);
-		ctx->state = SPU_STATE_SAVED;
-		need_yield = 1;
-	} else if (spu) {
-		spu->prio = MAX_PRIO;
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			struct mutex *mtx = &cbe_spu_info[node].list_mutex;
+
+			mutex_lock(mtx);
+			list_for_each_entry(spu, &cbe_spu_info[node].spus,
+					cbe_list) {
+				struct spu_context *ctx = spu->ctx;
+
+				if (ctx) {
+					get_spu_context(ctx);
+					mutex_unlock(mtx);
+					spusched_tick(ctx);
+					mutex_lock(mtx);
+					put_spu_context(ctx);
+				}
+			}
+			mutex_unlock(mtx);
+		}
 	}
-	up_write(&ctx->state_sema);
-	if (unlikely(need_yield))
-		yield();
+
+	return 0;
 }
 
-int __init spu_sched_init(void)
+void spuctx_switch_state(struct spu_context *ctx,
+		enum spu_utilization_state new_state)
 {
-	struct spu_runqueue *rq;
+	unsigned long long curtime;
+	signed long long delta;
+	struct timespec ts;
 	struct spu *spu;
-	int i;
+	enum spu_utilization_state old_state;
+	int node;
 
-	rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL);
-	if (!rq) {
-		printk(KERN_WARNING "%s: Unable to allocate runqueues.\n",
-		       __FUNCTION__);
-		return 1;
+	ktime_get_ts(&ts);
+	curtime = timespec_to_ns(&ts);
+	delta = curtime - ctx->stats.tstamp;
+
+	WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+	WARN_ON(delta < 0);
+
+	spu = ctx->spu;
+	old_state = ctx->stats.util_state;
+	ctx->stats.util_state = new_state;
+	ctx->stats.tstamp = curtime;
+
+	/*
+	 * Update the physical SPU utilization statistics.
+	 */
+	if (spu) {
+		ctx->stats.times[old_state] += delta;
+		spu->stats.times[old_state] += delta;
+		spu->stats.util_state = new_state;
+		spu->stats.tstamp = curtime;
+		node = spu->node;
+		if (old_state == SPU_UTIL_USER)
+			atomic_dec(&cbe_spu_info[node].busy_spus);
+		if (new_state == SPU_UTIL_USER)
+			atomic_inc(&cbe_spu_info[node].busy_spus);
 	}
-	memset(rq, 0, sizeof(struct spu_runqueue));
-	init_MUTEX(&rq->sem);
-	INIT_LIST_HEAD(&rq->active_list);
-	INIT_LIST_HEAD(&rq->idle_list);
-	rq->nr_active = 0;
-	rq->nr_idle = 0;
-	rq->nr_switches = 0;
-	atomic_set(&rq->prio.nr_blocked, 0);
+}
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static int show_spu_loadavg(struct seq_file *s, void *private)
+{
+	int a, b, c;
+
+	a = spu_avenrun[0] + (FIXED_1/200);
+	b = spu_avenrun[1] + (FIXED_1/200);
+	c = spu_avenrun[2] + (FIXED_1/200);
+
+	/*
+	 * Note that last_pid doesn't really make much sense for the
+	 * SPU loadavg (it even seems very odd on the CPU side...),
+	 * but we include it here to have a 100% compatible interface.
+	 */
+	seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+		LOAD_INT(a), LOAD_FRAC(a),
+		LOAD_INT(b), LOAD_FRAC(b),
+		LOAD_INT(c), LOAD_FRAC(c),
+		count_active_contexts(),
+		atomic_read(&nr_spu_contexts),
+		task_active_pid_ns(current)->last_pid);
+	return 0;
+}
+
+static int spu_loadavg_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_spu_loadavg, NULL);
+}
+
+static const struct file_operations spu_loadavg_fops = {
+	.open		= spu_loadavg_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int __init spu_sched_init(void)
+{
+	struct proc_dir_entry *entry;
+	int err = -ENOMEM, i;
+
+	spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
+	if (!spu_prio)
+		goto out;
+
 	for (i = 0; i < MAX_PRIO; i++) {
-		init_waitqueue_head(&rq->prio.waitq[i]);
-		__clear_bit(i, rq->prio.bitmap);
-	}
-	__set_bit(MAX_PRIO, rq->prio.bitmap);
-	for (;;) {
-		spu = spu_alloc();
-		if (!spu)
-			break;
-		pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number);
-		add_idle(rq, spu);
-		spu->rq = rq;
-		spu->timestamp = jiffies;
+		INIT_LIST_HEAD(&spu_prio->runq[i]);
+		__clear_bit(i, spu_prio->bitmap);
 	}
-	if (!rq->nr_idle) {
-		printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__);
-		kfree(rq);
-		return 1;
+	spin_lock_init(&spu_prio->runq_lock);
+
+	setup_timer(&spusched_timer, spusched_wake, 0);
+	setup_timer(&spuloadavg_timer, spuloadavg_wake, 0);
+
+	spusched_task = kthread_run(spusched_thread, NULL, "spusched");
+	if (IS_ERR(spusched_task)) {
+		err = PTR_ERR(spusched_task);
+		goto out_free_spu_prio;
 	}
+
+	mod_timer(&spuloadavg_timer, 0);
+
+	entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops);
+	if (!entry)
+		goto out_stop_kthread;
+
+	pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
+			SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
 	return 0;
+
+ out_stop_kthread:
+	kthread_stop(spusched_task);
+ out_free_spu_prio:
+	kfree(spu_prio);
+ out:
+	return err;
 }
 
-void __exit spu_sched_exit(void)
+void spu_sched_exit(void)
 {
-	struct spu_runqueue *rq = spu_rq();
 	struct spu *spu;
+	int node;
 
-	if (!rq) {
-		printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__);
-		return;
-	}
-	while (rq->nr_idle > 0) {
-		spu = del_idle(rq);
-		if (!spu)
-			break;
-		spu_free(spu);
+	remove_proc_entry("spu_loadavg", NULL);
+
+	del_timer_sync(&spusched_timer);
+	del_timer_sync(&spuloadavg_timer);
+	kthread_stop(spusched_task);
+
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+			if (spu->alloc_state != SPU_FREE)
+				spu->alloc_state = SPU_FREE;
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
 	}
-	kfree(rq);
+	kfree(spu_prio);
 }
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c
index 0bf723dcd67..72c905f1ee7 100644
--- a/arch/powerpc/platforms/cell/spufs/spu_restore.c
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c
@@ -84,13 +84,13 @@ static inline void restore_decr(void)
 	unsigned int decr_running;
 	unsigned int decr;
 
-	/* Restore, Step 6:
+	/* Restore, Step 6(moved):
 	 *    If the LSCSA "decrementer running" flag is set
 	 *    then write the SPU_WrDec channel with the
 	 *    decrementer value from LSCSA.
 	 */
 	offset = LSCSA_QW_OFFSET(decr_status);
-	decr_running = regs_spill[offset].slot[0];
+	decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
 	if (decr_running) {
 		offset = LSCSA_QW_OFFSET(decr);
 		decr = regs_spill[offset].slot[0];
@@ -284,7 +284,7 @@ static inline void restore_complete(void)
 		exit_instrs[3] = BR_INSTR;
 		break;
 	default:
-		/* SPU_Status[R]=1. No additonal instructions. */
+		/* SPU_Status[R]=1. No additional instructions. */
 		break;
 	}
 	spu_sync();
@@ -296,7 +296,7 @@ static inline void restore_complete(void)
  * This code deviates from the documented sequence in the
  * following aspects:
  *
- * 	1. The EA for LSCSA is passed from PPE in the
+ *	1. The EA for LSCSA is passed from PPE in the
  *	   signal notification channels.
  *	2. The register spill area is pulled by SPU
  *	   into LS, rather than pushed by PPE.
@@ -318,10 +318,10 @@ int main()
 	build_dma_list(lscsa_ea);	/* Step 3.  */
 	restore_upper_240kb(lscsa_ea);	/* Step 4.  */
 					/* Step 5: done by 'exit'. */
-	restore_decr();			/* Step 6. */
 	enqueue_putllc(lscsa_ea);	/* Step 7. */
 	set_tag_update();		/* Step 8. */
 	read_tag_status();		/* Step 9. */
+	restore_decr();			/* moved Step 6. */
 	read_llar_status();		/* Step 10. */
 	write_ppu_mb();			/* Step 11. */
 	write_ppuint_mb();		/* Step 12. */
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
index 1b2355ff703..f383b027e8b 100644
--- a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
@@ -3,229 +3,933 @@
  * Hex-dump auto generated from spu_restore.c.
  * Do not edit!
  */
-static unsigned int spu_restore_code[] __page_aligned = {
-0x40800000, 0x409ff801, 0x24000080, 0x24fd8081,
-0x1cd80081, 0x33001180, 0x42030003, 0x33800284,
-0x1c010204, 0x40200000, 0x40200000, 0x40200000,
-0x34000190, 0x34004191, 0x34008192, 0x3400c193,
-0x141fc205, 0x23fffd84, 0x1c100183, 0x217ffa85,
-0x3080a000, 0x3080a201, 0x3080a402, 0x3080a603,
-0x3080a804, 0x3080aa05, 0x3080ac06, 0x3080ae07,
-0x3080b008, 0x3080b209, 0x3080b40a, 0x3080b60b,
-0x3080b80c, 0x3080ba0d, 0x3080bc0e, 0x3080be0f,
-0x00003ffc, 0x00000000, 0x00000000, 0x00000000,
-0x01a00182, 0x3ec00083, 0xb0a14103, 0x01a00204,
-0x3ec10082, 0x4202800e, 0x04000703, 0xb0a14202,
-0x21a00803, 0x3fbf028d, 0x3f20068d, 0x3fbe0682,
-0x3fe30102, 0x21a00882, 0x3f82028f, 0x3fe3078f,
-0x3fbf0784, 0x3f200204, 0x3fbe0204, 0x3fe30204,
-0x04000203, 0x21a00903, 0x40848002, 0x21a00982,
-0x40800003, 0x21a00a03, 0x40802002, 0x21a00a82,
-0x21a00083, 0x40800082, 0x21a00b02, 0x10002818,
-0x40a80002, 0x32800007, 0x4207000c, 0x18008208,
-0x40a0000b, 0x4080020a, 0x40800709, 0x00200000,
-0x42070002, 0x3ac30384, 0x1cffc489, 0x00200000,
-0x18008383, 0x38830382, 0x4cffc486, 0x3ac28185,
-0xb0408584, 0x28830382, 0x1c020387, 0x38828182,
-0xb0408405, 0x1802c408, 0x28828182, 0x217ff886,
-0x04000583, 0x21a00803, 0x3fbe0682, 0x3fe30102,
-0x04000106, 0x21a00886, 0x04000603, 0x21a00903,
-0x40803c02, 0x21a00982, 0x40800003, 0x04000184,
-0x21a00a04, 0x40802202, 0x21a00a82, 0x42028005,
-0x34208702, 0x21002282, 0x21a00804, 0x21a00886,
-0x3fbf0782, 0x3f200102, 0x3fbe0102, 0x3fe30102,
-0x21a00902, 0x40804003, 0x21a00983, 0x21a00a04,
-0x40805a02, 0x21a00a82, 0x40800083, 0x21a00b83,
-0x01a00c02, 0x01a00d83, 0x3420c282, 0x21a00e02,
-0x34210283, 0x21a00f03, 0x34200284, 0x77400200,
-0x3421c282, 0x21a00702, 0x34218283, 0x21a00083,
-0x34214282, 0x21a00b02, 0x4200480c, 0x00200000,
-0x1c010286, 0x34220284, 0x34220302, 0x0f608203,
-0x5c024204, 0x3b81810b, 0x42013c02, 0x00200000,
-0x18008185, 0x38808183, 0x3b814182, 0x21004e84,
-0x4020007f, 0x35000100, 0x000004e0, 0x000002a0,
-0x000002e8, 0x00000428, 0x00000360, 0x000002e8,
-0x000004a0, 0x00000468, 0x000003c8, 0x00000360,
-0x409ffe02, 0x30801203, 0x40800204, 0x3ec40085,
-0x10009c09, 0x3ac10606, 0xb060c105, 0x4020007f,
-0x4020007f, 0x20801203, 0x38810602, 0xb0408586,
-0x28810602, 0x32004180, 0x34204702, 0x21a00382,
-0x4020007f, 0x327fdc80, 0x409ffe02, 0x30801203,
-0x40800204, 0x3ec40087, 0x40800405, 0x00200000,
-0x40800606, 0x3ac10608, 0x3ac14609, 0x3ac1860a,
-0xb060c107, 0x20801203, 0x41004003, 0x38810602,
-0x4020007f, 0xb0408188, 0x4020007f, 0x28810602,
-0x41201002, 0x38814603, 0x10009c09, 0xb060c109,
-0x4020007f, 0x28814603, 0x41193f83, 0x38818602,
-0x60ffc003, 0xb040818a, 0x28818602, 0x32003080,
-0x409ffe02, 0x30801203, 0x40800204, 0x3ec40087,
-0x41201008, 0x10009c14, 0x40800405, 0x3ac10609,
-0x40800606, 0x3ac1460a, 0xb060c107, 0x3ac1860b,
-0x20801203, 0x38810602, 0xb0408409, 0x28810602,
-0x38814603, 0xb060c40a, 0x4020007f, 0x28814603,
-0x41193f83, 0x38818602, 0x60ffc003, 0xb040818b,
-0x28818602, 0x32002380, 0x409ffe02, 0x30801204,
-0x40800205, 0x3ec40083, 0x40800406, 0x3ac14607,
-0x3ac18608, 0xb0810103, 0x41004002, 0x20801204,
-0x4020007f, 0x38814603, 0x10009c0b, 0xb060c107,
-0x4020007f, 0x4020007f, 0x28814603, 0x38818602,
-0x4020007f, 0x4020007f, 0xb0408588, 0x28818602,
-0x4020007f, 0x32001780, 0x409ffe02, 0x1000640e,
-0x40800204, 0x30801203, 0x40800405, 0x3ec40087,
-0x40800606, 0x3ac10608, 0x3ac14609, 0x3ac1860a,
-0xb060c107, 0x20801203, 0x413d8003, 0x38810602,
-0x4020007f, 0x327fd780, 0x409ffe02, 0x10007f0c,
-0x40800205, 0x30801204, 0x40800406, 0x3ec40083,
-0x3ac14607, 0x3ac18608, 0xb0810103, 0x413d8002,
-0x20801204, 0x38814603, 0x4020007f, 0x327feb80,
-0x409ffe02, 0x30801203, 0x40800204, 0x3ec40087,
-0x40800405, 0x1000650a, 0x40800606, 0x3ac10608,
-0x3ac14609, 0x3ac1860a, 0xb060c107, 0x20801203,
-0x38810602, 0xb0408588, 0x4020007f, 0x327fc980,
-0x00400000, 0x40800003, 0x4020007f, 0x35000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
+static unsigned int spu_restore_code[]  __attribute__((__aligned__(128))) = {
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33001180,
+0x42034003,
+0x33800284,
+0x1c010204,
+0x40200000,
+0x40200000,
+0x40200000,
+0x34000190,
+0x34004191,
+0x34008192,
+0x3400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffa85,
+0x3080b000,
+0x3080b201,
+0x3080b402,
+0x3080b603,
+0x3080b804,
+0x3080ba05,
+0x3080bc06,
+0x3080be07,
+0x3080c008,
+0x3080c209,
+0x3080c40a,
+0x3080c60b,
+0x3080c80c,
+0x3080ca0d,
+0x3080cc0e,
+0x3080ce0f,
+0x00003ffc,
+0x00000000,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb0a14103,
+0x01a00204,
+0x3ec10083,
+0x4202c002,
+0xb0a14203,
+0x21a00802,
+0x3fbf028a,
+0x3f20050a,
+0x3fbe0502,
+0x3fe30102,
+0x21a00882,
+0x3f82028b,
+0x3fe3058b,
+0x3fbf0584,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x04000203,
+0x21a00903,
+0x40848002,
+0x21a00982,
+0x40800003,
+0x21a00a03,
+0x40802002,
+0x21a00a82,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x10002612,
+0x42a00003,
+0x42074006,
+0x1800c204,
+0x40a00008,
+0x40800789,
+0x1c010305,
+0x34000302,
+0x1cffc489,
+0x3ec00303,
+0x3ec00287,
+0xb0408403,
+0x24000302,
+0x34000282,
+0x1c020306,
+0xb0408207,
+0x18020204,
+0x24000282,
+0x217ffa09,
+0x04000402,
+0x21a00802,
+0x3fbe0504,
+0x3fe30204,
+0x21a00884,
+0x42074002,
+0x21a00902,
+0x40803c03,
+0x21a00983,
+0x04000485,
+0x21a00a05,
+0x40802202,
+0x21a00a82,
+0x21a00805,
+0x21a00884,
+0x3fbf0582,
+0x3f200102,
+0x3fbe0102,
+0x3fe30102,
+0x21a00902,
+0x40804003,
+0x21a00983,
+0x21a00a05,
+0x40805a02,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x30809c03,
+0x34000182,
+0x14004102,
+0x21002082,
+0x01a00d82,
+0x3080a003,
+0x34000182,
+0x21a00e02,
+0x3080a203,
+0x34000182,
+0x21a00f02,
+0x3080a403,
+0x34000182,
+0x77400100,
+0x3080a603,
+0x34000182,
+0x21a00702,
+0x3080a803,
+0x34000182,
+0x21a00082,
+0x3080aa03,
+0x34000182,
+0x21a00b02,
+0x4020007f,
+0x3080ae02,
+0x42004805,
+0x3080ac04,
+0x34000103,
+0x34000202,
+0x1cffc183,
+0x3b810106,
+0x0f608184,
+0x42013802,
+0x5c020183,
+0x38810102,
+0x3b810102,
+0x21000e83,
+0x4020007f,
+0x35000100,
+0x00000470,
+0x000002f8,
+0x00000430,
+0x00000360,
+0x000002f8,
+0x000003c8,
+0x000004a8,
+0x00000298,
+0x00000360,
+0x00200000,
+0x409ffe02,
+0x30801203,
+0x40800208,
+0x3ec40084,
+0x40800407,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38820282,
+0x41004003,
+0xb0408189,
+0x28820282,
+0x3881c282,
+0xb0408304,
+0x2881c282,
+0x00400000,
+0x40800003,
+0x35000000,
+0x30809e03,
+0x34000182,
+0x21a00382,
+0x4020007f,
+0x327fde00,
+0x409ffe02,
+0x30801203,
+0x40800206,
+0x3ec40084,
+0x40800407,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38818282,
+0x41004003,
+0xb040818a,
+0x10005b0b,
+0x41201003,
+0x28818282,
+0x3881c282,
+0xb0408184,
+0x41193f83,
+0x60ffc003,
+0x2881c282,
+0x38820282,
+0xb0408189,
+0x28820282,
+0x327fef80,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40086,
+0x4120100b,
+0x10005b14,
+0x40800404,
+0x3ac1c289,
+0x40800608,
+0xb060c106,
+0x3ac10286,
+0x3ac2028a,
+0x20801203,
+0x3881c282,
+0x41193f83,
+0x60ffc003,
+0xb0408589,
+0x2881c282,
+0x38810282,
+0xb0408586,
+0x28810282,
+0x38820282,
+0xb040818a,
+0x28820282,
+0x4020007f,
+0x327fe280,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40084,
+0x40800408,
+0x10005b14,
+0x40800609,
+0x3ac1c28a,
+0x3ac2028b,
+0xb060c104,
+0x3ac24284,
+0x20801203,
+0x41201003,
+0x3881c282,
+0xb040830a,
+0x2881c282,
+0x38820282,
+0xb040818b,
+0x41193f83,
+0x60ffc003,
+0x28820282,
+0x38824282,
+0xb0408184,
+0x28824282,
+0x4020007f,
+0x327fd580,
+0x409ffe02,
+0x1000658e,
+0x40800206,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38818282,
+0x4020007f,
+0x327fd800,
+0x409ffe03,
+0x30801202,
+0x40800207,
+0x3ec40084,
+0x10005b09,
+0x3ac1c288,
+0xb0408184,
+0x4020007f,
+0x4020007f,
+0x20801202,
+0x3881c282,
+0xb0408308,
+0x2881c282,
+0x327fc680,
+0x409ffe02,
+0x1000588b,
+0x40800208,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38820282,
+0x327fbd80,
+0x00200000,
+0x00000da0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d90,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000db0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dc0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d80,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000df0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000de0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dd0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e04,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e00,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save.c b/arch/powerpc/platforms/cell/spufs/spu_save.c
index 196033b8a57..ae95cc1701e 100644
--- a/arch/powerpc/platforms/cell/spufs/spu_save.c
+++ b/arch/powerpc/platforms/cell/spufs/spu_save.c
@@ -44,7 +44,7 @@ static inline void save_event_mask(void)
 	 *    Read the SPU_RdEventMsk channel and save to the LSCSA.
 	 */
 	offset = LSCSA_QW_OFFSET(event_mask);
-	regs_spill[offset].slot[0] = spu_readch(SPU_RdEventStatMask);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdEventMask);
 }
 
 static inline void save_tag_mask(void)
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
index 39e54003f1d..b9f81ac8a63 100644
--- a/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
+++ b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
@@ -3,189 +3,741 @@
  * Hex-dump auto generated from spu_save.c.
  * Do not edit!
  */
-static unsigned int spu_save_code[] __page_aligned = {
-0x20805000, 0x20805201, 0x20805402, 0x20805603,
-0x20805804, 0x20805a05, 0x20805c06, 0x20805e07,
-0x20806008, 0x20806209, 0x2080640a, 0x2080660b,
-0x2080680c, 0x20806a0d, 0x20806c0e, 0x20806e0f,
-0x4201c003, 0x33800184, 0x1c010204, 0x40200000,
-0x24000190, 0x24004191, 0x24008192, 0x2400c193,
-0x141fc205, 0x23fffd84, 0x1c100183, 0x217ffb85,
-0x40800000, 0x409ff801, 0x24000080, 0x24fd8081,
-0x1cd80081, 0x33000180, 0x00000000, 0x00000000,
-0x01a00182, 0x3ec00083, 0xb1c38103, 0x01a00204,
-0x3ec10082, 0x4201400d, 0xb1c38202, 0x01a00583,
-0x34218682, 0x3ed80684, 0xb0408184, 0x24218682,
-0x01a00603, 0x00200000, 0x34214682, 0x3ed40684,
-0xb0408184, 0x40800003, 0x24214682, 0x21a00083,
-0x40800082, 0x21a00b02, 0x4020007f, 0x1000251e,
-0x40a80002, 0x32800008, 0x4205c00c, 0x00200000,
-0x40a0000b, 0x3f82070f, 0x4080020a, 0x40800709,
-0x3fe3078f, 0x3fbf0783, 0x3f200183, 0x3fbe0183,
-0x3fe30187, 0x18008387, 0x4205c002, 0x3ac30404,
-0x1cffc489, 0x00200000, 0x18008403, 0x38830402,
-0x4cffc486, 0x3ac28185, 0xb0408584, 0x28830402,
-0x1c020408, 0x38828182, 0xb0408385, 0x1802c387,
-0x28828182, 0x217ff886, 0x04000582, 0x32800007,
-0x21a00802, 0x3fbf0705, 0x3f200285, 0x3fbe0285,
-0x3fe30285, 0x21a00885, 0x04000603, 0x21a00903,
-0x40803c02, 0x21a00982, 0x04000386, 0x21a00a06,
-0x40801202, 0x21a00a82, 0x73000003, 0x24200683,
-0x01a00404, 0x00200000, 0x34204682, 0x3ec40683,
-0xb0408203, 0x24204682, 0x01a00783, 0x00200000,
-0x3421c682, 0x3edc0684, 0xb0408184, 0x2421c682,
-0x21a00806, 0x21a00885, 0x3fbf0784, 0x3f200204,
-0x3fbe0204, 0x3fe30204, 0x21a00904, 0x40804002,
-0x21a00982, 0x21a00a06, 0x40805a02, 0x21a00a82,
-0x04000683, 0x21a00803, 0x21a00885, 0x21a00904,
-0x40848002, 0x21a00982, 0x21a00a06, 0x40801002,
-0x21a00a82, 0x21a00a06, 0x40806602, 0x00200000,
-0x35800009, 0x21a00a82, 0x40800083, 0x21a00b83,
-0x01a00c02, 0x01a00d83, 0x00003ffb, 0x40800003,
-0x4020007f, 0x35000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000,
+static unsigned int spu_save_code[]  __attribute__((__aligned__(128))) = {
+0x20805000,
+0x20805201,
+0x20805402,
+0x20805603,
+0x20805804,
+0x20805a05,
+0x20805c06,
+0x20805e07,
+0x20806008,
+0x20806209,
+0x2080640a,
+0x2080660b,
+0x2080680c,
+0x20806a0d,
+0x20806c0e,
+0x20806e0f,
+0x4201c003,
+0x33800184,
+0x1c010204,
+0x40200000,
+0x24000190,
+0x24004191,
+0x24008192,
+0x2400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffb85,
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33000180,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb1c38103,
+0x01a00204,
+0x3ec10082,
+0x4201400d,
+0xb1c38202,
+0x01a00583,
+0x34218682,
+0x3ed80684,
+0xb0408184,
+0x24218682,
+0x01a00603,
+0x00200000,
+0x34214682,
+0x3ed40684,
+0xb0408184,
+0x40800003,
+0x24214682,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x4020007f,
+0x1000251e,
+0x42a00002,
+0x32800008,
+0x4205c00c,
+0x00200000,
+0x40a0000b,
+0x3f82070f,
+0x4080020a,
+0x40800709,
+0x3fe3078f,
+0x3fbf0783,
+0x3f200183,
+0x3fbe0183,
+0x3fe30187,
+0x18008387,
+0x4205c002,
+0x3ac30404,
+0x1cffc489,
+0x00200000,
+0x18008403,
+0x38830402,
+0x4cffc486,
+0x3ac28185,
+0xb0408584,
+0x28830402,
+0x1c020408,
+0x38828182,
+0xb0408385,
+0x1802c387,
+0x28828182,
+0x217ff886,
+0x04000582,
+0x32800007,
+0x21a00802,
+0x3fbf0705,
+0x3f200285,
+0x3fbe0285,
+0x3fe30285,
+0x21a00885,
+0x04000603,
+0x21a00903,
+0x40803c02,
+0x21a00982,
+0x04000386,
+0x21a00a06,
+0x40801202,
+0x21a00a82,
+0x73000003,
+0x24200683,
+0x01a00404,
+0x00200000,
+0x34204682,
+0x3ec40683,
+0xb0408203,
+0x24204682,
+0x01a00783,
+0x00200000,
+0x3421c682,
+0x3edc0684,
+0xb0408184,
+0x2421c682,
+0x21a00806,
+0x21a00885,
+0x3fbf0784,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x21a00904,
+0x40804002,
+0x21a00982,
+0x21a00a06,
+0x40805a02,
+0x21a00a82,
+0x04000683,
+0x21a00803,
+0x21a00885,
+0x21a00904,
+0x40848002,
+0x21a00982,
+0x21a00a06,
+0x40801002,
+0x21a00a82,
+0x21a00a06,
+0x40806602,
+0x00200000,
+0x35800009,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x01a00d83,
+0x00003ffb,
+0x40800003,
+0x4020007f,
+0x35000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index db2601f0abd..bcfd6f063ef 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -23,12 +23,20 @@
 #define SPUFS_H
 
 #include <linux/kref.h>
-#include <linux/rwsem.h>
+#include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/fs.h>
+#include <linux/cpumask.h>
 
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+
+#define SPUFS_PS_MAP_SIZE	0x20000
+#define SPUFS_MFC_MAP_SIZE	0x1000
+#define SPUFS_CNTL_MAP_SIZE	0x1000
+#define SPUFS_SIGNAL_MAP_SIZE	PAGE_SIZE
+#define SPUFS_MSS_MAP_SIZE	0x1000
 
 /* The magic number for our file system */
 enum {
@@ -36,18 +44,55 @@ enum {
 };
 
 struct spu_context_ops;
+struct spu_gang;
+
+/* ctx->sched_flags */
+enum {
+	SPU_SCHED_NOTIFY_ACTIVE,
+	SPU_SCHED_WAS_ACTIVE,	/* was active upon spu_acquire_saved()  */
+	SPU_SCHED_SPU_RUN,	/* context is within spu_run */
+};
 
-#define SPU_CONTEXT_PREEMPT          0UL
+enum {
+	SWITCH_LOG_BUFSIZE = 4096,
+};
+
+enum {
+	SWITCH_LOG_START,
+	SWITCH_LOG_STOP,
+	SWITCH_LOG_EXIT,
+};
+
+struct switch_log {
+	wait_queue_head_t	wait;
+	unsigned long		head;
+	unsigned long		tail;
+	struct switch_log_entry {
+		struct timespec	tstamp;
+		s32		spu_id;
+		u32		type;
+		u32		val;
+		u64		timebase;
+	} log[];
+};
 
 struct spu_context {
 	struct spu *spu;		  /* pointer to a physical SPU */
 	struct spu_state csa;		  /* SPU context save area. */
 	spinlock_t mmio_lock;		  /* protects mmio access */
-	struct address_space *local_store;/* local store backing store */
+	struct address_space *local_store; /* local store mapping.  */
+	struct address_space *mfc;	   /* 'mfc' area mappings. */
+	struct address_space *cntl;	   /* 'control' area mappings. */
+	struct address_space *signal1;	   /* 'signal1' area mappings. */
+	struct address_space *signal2;	   /* 'signal2' area mappings. */
+	struct address_space *mss;	   /* 'mss' area mappings. */
+	struct address_space *psmap;	   /* 'psmap' area mappings. */
+	struct mutex mapping_lock;
+	u64 object_id;		   /* user space pointer for oprofile */
 
 	enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
-	struct rw_semaphore state_sema;
-	struct semaphore run_sema;
+	struct mutex state_mutex;
+	struct mutex run_mutex;
 
 	struct mm_struct *owner;
 
@@ -55,13 +100,89 @@ struct spu_context {
 	wait_queue_head_t ibox_wq;
 	wait_queue_head_t wbox_wq;
 	wait_queue_head_t stop_wq;
+	wait_queue_head_t mfc_wq;
+	wait_queue_head_t run_wq;
 	struct fasync_struct *ibox_fasync;
 	struct fasync_struct *wbox_fasync;
+	struct fasync_struct *mfc_fasync;
+	u32 tagwait;
 	struct spu_context_ops *ops;
 	struct work_struct reap_work;
-	u64 flags;
+	unsigned long flags;
+	unsigned long event_return;
+
+	struct list_head gang_list;
+	struct spu_gang *gang;
+	struct kref *prof_priv_kref;
+	void ( * prof_priv_release) (struct kref *kref);
+
+	/* owner thread */
+	pid_t tid;
+
+	/* scheduler fields */
+	struct list_head rq;
+	unsigned int time_slice;
+	unsigned long sched_flags;
+	cpumask_t cpus_allowed;
+	int policy;
+	int prio;
+	int last_ran;
+
+	/* statistics */
+	struct {
+		/* updates protected by ctx->state_mutex */
+		enum spu_utilization_state util_state;
+		unsigned long long tstamp;	/* time of last state switch */
+		unsigned long long times[SPU_UTIL_MAX];
+		unsigned long long vol_ctx_switch;
+		unsigned long long invol_ctx_switch;
+		unsigned long long min_flt;
+		unsigned long long maj_flt;
+		unsigned long long hash_flt;
+		unsigned long long slb_flt;
+		unsigned long long slb_flt_base; /* # at last ctx switch */
+		unsigned long long class2_intr;
+		unsigned long long class2_intr_base; /* # at last ctx switch */
+		unsigned long long libassist;
+	} stats;
+
+	/* context switch log */
+	struct switch_log *switch_log;
+
+	struct list_head aff_list;
+	int aff_head;
+	int aff_offset;
+};
+
+struct spu_gang {
+	struct list_head list;
+	struct mutex mutex;
+	struct kref kref;
+	int contexts;
+
+	struct spu_context *aff_ref_ctx;
+	struct list_head aff_list_head;
+	struct mutex aff_mutex;
+	int aff_flags;
+	struct spu *aff_ref_spu;
+	atomic_t aff_sched_count;
 };
 
+/* Flag bits for spu_gang aff_flags */
+#define AFF_OFFSETS_SET		1
+#define AFF_MERGED		2
+
+struct mfc_dma_command {
+	int32_t pad;	/* reserved */
+	uint32_t lsa;	/* local storage address */
+	uint64_t ea;	/* effective address */
+	uint16_t size;	/* transfer size */
+	uint16_t tag;	/* command tag */
+	uint16_t class;	/* class ID */
+	uint16_t cmd;	/* command opcode */
+};
+
+
 /* SPU context query/set operations. */
 struct spu_context_ops {
 	int (*mbox_read) (struct spu_context * ctx, u32 * data);
@@ -82,8 +203,22 @@ struct spu_context_ops {
 	void (*npc_write) (struct spu_context * ctx, u32 data);
 	 u32(*status_read) (struct spu_context * ctx);
 	char*(*get_ls) (struct spu_context * ctx);
+	void (*privcntl_write) (struct spu_context *ctx, u64 data);
+	 u32 (*runcntl_read) (struct spu_context * ctx);
 	void (*runcntl_write) (struct spu_context * ctx, u32 data);
 	void (*runcntl_stop) (struct spu_context * ctx);
+	void (*master_start) (struct spu_context * ctx);
+	void (*master_stop) (struct spu_context * ctx);
+	int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode);
+	u32 (*read_mfc_tagstatus)(struct spu_context * ctx);
+	u32 (*get_mfc_free_elements)(struct spu_context *ctx);
+	int (*send_mfc_command)(struct spu_context * ctx,
+				struct mfc_dma_command * cmd);
+	void (*dma_info_read) (struct spu_context * ctx,
+			       struct spu_dma_info * info);
+	void (*proxydma_info_read) (struct spu_context * ctx,
+				    struct spu_proxydma_info * info);
+	void (*restart_dma)(struct spu_context *ctx);
 };
 
 extern struct spu_context_ops spu_hw_ops;
@@ -91,44 +226,96 @@ extern struct spu_context_ops spu_backing_ops;
 
 struct spufs_inode_info {
 	struct spu_context *i_ctx;
+	struct spu_gang *i_gang;
 	struct inode vfs_inode;
+	int i_openers;
 };
 #define SPUFS_I(inode) \
 	container_of(inode, struct spufs_inode_info, vfs_inode)
 
-extern struct tree_descr spufs_dir_contents[];
+struct spufs_tree_descr {
+	const char *name;
+	const struct file_operations *ops;
+	umode_t mode;
+	size_t size;
+};
+
+extern const struct spufs_tree_descr spufs_dir_contents[];
+extern const struct spufs_tree_descr spufs_dir_nosched_contents[];
+extern const struct spufs_tree_descr spufs_dir_debug_contents[];
 
 /* system call implementation */
-long spufs_run_spu(struct file *file,
-		   struct spu_context *ctx, u32 *npc, u32 *status);
-long spufs_create_thread(struct nameidata *nd,
-			 unsigned int flags, mode_t mode);
-extern struct file_operations spufs_context_fops;
+extern struct spufs_calls spufs_calls;
+struct coredump_params;
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
+long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
+			umode_t mode, struct file *filp);
+/* ELF coredump callbacks for writing SPU ELF notes */
+extern int spufs_coredump_extra_notes_size(void);
+extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
+
+extern const struct file_operations spufs_context_fops;
+
+/* gang management */
+struct spu_gang *alloc_spu_gang(void);
+struct spu_gang *get_spu_gang(struct spu_gang *gang);
+int put_spu_gang(struct spu_gang *gang);
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
+
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+int spufs_handle_class0(struct spu_context *ctx);
+
+/* affinity */
+struct spu *affinity_check(struct spu_context *ctx);
 
 /* context management */
-struct spu_context * alloc_spu_context(struct address_space *local_store);
+extern atomic_t nr_spu_contexts;
+static inline int __must_check spu_acquire(struct spu_context *ctx)
+{
+	return mutex_lock_interruptible(&ctx->state_mutex);
+}
+
+static inline void spu_release(struct spu_context *ctx)
+{
+	mutex_unlock(&ctx->state_mutex);
+}
+
+struct spu_context * alloc_spu_context(struct spu_gang *gang);
 void destroy_spu_context(struct kref *kref);
 struct spu_context * get_spu_context(struct spu_context *ctx);
 int put_spu_context(struct spu_context *ctx);
 void spu_unmap_mappings(struct spu_context *ctx);
 
 void spu_forget(struct spu_context *ctx);
-void spu_acquire(struct spu_context *ctx);
-void spu_release(struct spu_context *ctx);
-int spu_acquire_runnable(struct spu_context *ctx);
-void spu_acquire_saved(struct spu_context *ctx);
+int __must_check spu_acquire_saved(struct spu_context *ctx);
+void spu_release_saved(struct spu_context *ctx);
 
-int spu_activate(struct spu_context *ctx, u64 flags);
+int spu_stopped(struct spu_context *ctx, u32 * stat);
+void spu_del_from_rq(struct spu_context *ctx);
+int spu_activate(struct spu_context *ctx, unsigned long flags);
 void spu_deactivate(struct spu_context *ctx);
 void spu_yield(struct spu_context *ctx);
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+		u32 type, u32 val);
+void spu_set_timeslice(struct spu_context *ctx);
+void spu_update_sched_info(struct spu_context *ctx);
+void __spu_update_sched_info(struct spu_context *ctx);
 int __init spu_sched_init(void);
-void __exit spu_sched_exit(void);
+void spu_sched_exit(void);
+
+extern char *isolated_loader;
 
 /*
  * spufs_wait
- * 	Same as wait_event_interruptible(), except that here
+ *	Same as wait_event_interruptible(), except that here
  *	we need to call spu_release(ctx) before sleeping, and
  *	then spu_acquire(ctx) when awoken.
+ *
+ * 	Returns with state_mutex re-acquired when successful or
+ * 	with -ERESTARTSYS and the state_mutex dropped when interrupted.
  */
 
 #define spufs_wait(wq, condition)					\
@@ -139,14 +326,15 @@ void __exit spu_sched_exit(void);
 		prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE);	\
 		if (condition)						\
 			break;						\
-		if (!signal_pending(current)) {				\
-			spu_release(ctx);				\
-			schedule();					\
-			spu_acquire(ctx);				\
-			continue;					\
+		spu_release(ctx);					\
+		if (signal_pending(current)) {				\
+			__ret = -ERESTARTSYS;				\
+			break;						\
 		}							\
-		__ret = -ERESTARTSYS;					\
-		break;							\
+		schedule();						\
+		__ret = spu_acquire(ctx);				\
+		if (__ret)						\
+			break;						\
 	}								\
 	finish_wait(&(wq), &__wait);					\
 	__ret;								\
@@ -158,6 +346,31 @@ size_t spu_ibox_read(struct spu_context *ctx, u32 *data);
 /* irq callback funcs. */
 void spufs_ibox_callback(struct spu *spu);
 void spufs_wbox_callback(struct spu *spu);
-void spufs_stop_callback(struct spu *spu);
+void spufs_stop_callback(struct spu *spu, int irq);
+void spufs_mfc_callback(struct spu *spu);
+void spufs_dma_callback(struct spu *spu, int type);
+
+extern struct spu_coredump_calls spufs_coredump_calls;
+struct spufs_coredump_reader {
+	char *name;
+	ssize_t (*read)(struct spu_context *ctx,
+			char __user *buffer, size_t size, loff_t *pos);
+	u64 (*get)(struct spu_context *ctx);
+	size_t size;
+};
+extern const struct spufs_coredump_reader spufs_coredump_read[];
+extern int spufs_coredump_num_notes;
+
+extern int spu_init_csa(struct spu_state *csa);
+extern void spu_fini_csa(struct spu_state *csa);
+extern int spu_save(struct spu_state *prev, struct spu *spu);
+extern int spu_restore(struct spu_state *new, struct spu *spu);
+extern int spu_switch(struct spu_state *prev, struct spu_state *new,
+		      struct spu *spu);
+extern int spu_alloc_lscsa(struct spu_state *csa);
+extern void spu_free_lscsa(struct spu_state *csa);
+
+extern void spuctx_switch_state(struct spu_context *ctx,
+		enum spu_utilization_state new_state);
 
 #endif
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.h b/arch/powerpc/platforms/cell/spufs/sputrace.h
new file mode 100644
index 00000000000..db2656aa410
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.h
@@ -0,0 +1,39 @@
+#if !defined(_TRACE_SPUFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SPUFS_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM spufs
+
+TRACE_EVENT(spufs_context,
+	TP_PROTO(struct spu_context *ctx, struct spu *spu, const char *name),
+	TP_ARGS(ctx, spu, name),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(int, owner_tid)
+		__field(int, number)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->owner_tid = ctx->tid;
+		__entry->number = spu ? spu->number : -1;
+	),
+
+	TP_printk("%s (ctxthread = %d, spu = %d)",
+		__entry->name, __entry->owner_tid, __entry->number)
+);
+
+#define spu_context_trace(name, ctx, spu) \
+	trace_spufs_context(ctx, spu, __stringify(name))
+#define spu_context_nospu_trace(name, ctx) \
+	trace_spufs_context(ctx, NULL, __stringify(name))
+
+#endif /* _TRACE_SPUFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE sputrace
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index 212db28531f..dde35551e74 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -32,23 +32,25 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/config.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/errno.h>
+#include <linux/hardirq.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
 
 #include <asm/io.h>
 #include <asm/spu.h>
+#include <asm/spu_priv1.h>
 #include <asm/spu_csa.h>
 #include <asm/mmu_context.h>
 
+#include "spufs.h"
+
 #include "spu_save_dump.h"
 #include "spu_restore_dump.h"
 
@@ -71,7 +73,7 @@
   }
 #endif				/* debug */
 
-#define POLL_WHILE_FALSE(_c) 	POLL_WHILE_TRUE(!(_c))
+#define POLL_WHILE_FALSE(_c)	POLL_WHILE_TRUE(!(_c))
 
 static inline void acquire_spu_lock(struct spu *spu)
 {
@@ -102,7 +104,7 @@ static inline int check_spu_isolate(struct spu_state *csa, struct spu *spu)
 	 *     saved at this time.
 	 */
 	isolate_state = SPU_STATUS_ISOLATED_STATE |
-	    SPU_STATUS_ISOLATED_LOAD_STAUTUS | SPU_STATUS_ISOLATED_EXIT_STAUTUS;
+	    SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS;
 	return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0;
 }
 
@@ -116,6 +118,8 @@ static inline void disable_interrupts(struct spu_state *csa, struct spu *spu)
 	 *     Write INT_MASK_class1 with value of 0.
 	 *     Save INT_Mask_class2 in CSA.
 	 *     Write INT_MASK_class2 with value of 0.
+	 *     Synchronize all three interrupts to be sure
+	 *     we no longer execute a handler on another CPU.
 	 */
 	spin_lock_irq(&spu->register_lock);
 	if (csa) {
@@ -128,6 +132,17 @@ static inline void disable_interrupts(struct spu_state *csa, struct spu *spu)
 	spu_int_mask_set(spu, 2, 0ul);
 	eieio();
 	spin_unlock_irq(&spu->register_lock);
+
+	/*
+	 * This flag needs to be set before calling synchronize_irq so
+	 * that the update will be visible to the relevant handlers
+	 * via a simple load.
+	 */
+	set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+	clear_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+	synchronize_irq(spu->irqs[0]);
+	synchronize_irq(spu->irqs[1]);
+	synchronize_irq(spu->irqs[2]);
 }
 
 static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu)
@@ -159,9 +174,8 @@ static inline void set_switch_pending(struct spu_state *csa, struct spu *spu)
 	/* Save, Step 7:
 	 * Restore, Step 5:
 	 *     Set a software context switch pending flag.
+	 *     Done above in Step 3 - disable_interrupts().
 	 */
-	set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
-	mb();
 }
 
 static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
@@ -179,22 +193,21 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
 				 MFC_CNTL_SUSPEND_COMPLETE);
 		/* fall through */
 	case MFC_CNTL_SUSPEND_COMPLETE:
-		if (csa) {
+		if (csa)
 			csa->priv2.mfc_control_RW =
 				in_be64(&priv2->mfc_control_RW) |
 				MFC_CNTL_SUSPEND_DMA_QUEUE;
-		}
 		break;
 	case MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION:
 		out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
 		POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
 				  MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
 				 MFC_CNTL_SUSPEND_COMPLETE);
-		if (csa) {
+		if (csa)
 			csa->priv2.mfc_control_RW =
 				in_be64(&priv2->mfc_control_RW) &
-				~MFC_CNTL_SUSPEND_DMA_QUEUE;
-		}
+				~MFC_CNTL_SUSPEND_DMA_QUEUE &
+				~MFC_CNTL_SUSPEND_MASK;
 		break;
 	}
 }
@@ -244,24 +257,21 @@ static inline void save_spu_status(struct spu_state *csa, struct spu *spu)
 	}
 }
 
-static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu)
+static inline void save_mfc_stopped_status(struct spu_state *csa,
+		struct spu *spu)
 {
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	const u64 mask = MFC_CNTL_DECREMENTER_RUNNING |
+			MFC_CNTL_DMA_QUEUES_EMPTY;
 
 	/* Save, Step 12:
 	 *     Read MFC_CNTL[Ds].  Update saved copy of
 	 *     CSA.MFC_CNTL[Ds].
+	 *
+	 * update: do the same with MFC_CNTL[Q].
 	 */
-	if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) {
-		csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
-		csa->suspend_time = get_cycles();
-		out_be64(&priv2->spu_chnlcntptr_RW, 7ULL);
-		eieio();
-		csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW);
-		eieio();
-	} else {
-		csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
-	}
+	csa->priv2.mfc_control_RW &= ~mask;
+	csa->priv2.mfc_control_RW |= in_be64(&priv2->mfc_control_RW) & mask;
 }
 
 static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
@@ -272,7 +282,8 @@ static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
 	 *     Write MFC_CNTL[Dh] set to a '1' to halt
 	 *     the decrementer.
 	 */
-	out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED);
+	out_be64(&priv2->mfc_control_RW,
+		 MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
 	eieio();
 }
 
@@ -388,6 +399,19 @@ static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu)
 	csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW);
 }
 
+static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save the Prxy_TagStatus register in the CSA.
+	 *
+	 * It is unnecessary to restore dma_tagstatus_R, however,
+	 * dma_tagstatus_R in the CSA is accessed via backing_ops, so
+	 * we must save it.
+	 */
+	csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R);
+}
+
 static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
 {
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
@@ -451,7 +475,9 @@ static inline void purge_mfc_queue(struct spu_state *csa, struct spu *spu)
 	 * Restore, Step 14.
 	 *     Write MFC_CNTL[Pc]=1 (purge queue).
 	 */
-	out_be64(&priv2->mfc_control_RW, MFC_CNTL_PURGE_DMA_REQUEST);
+	out_be64(&priv2->mfc_control_RW,
+			MFC_CNTL_PURGE_DMA_REQUEST |
+			MFC_CNTL_SUSPEND_MASK);
 	eieio();
 }
 
@@ -463,30 +489,11 @@ static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu)
 	 *     Poll MFC_CNTL[Ps] until value '11' is read
 	 *     (purge complete).
 	 */
-	POLL_WHILE_FALSE(in_be64(&priv2->mfc_control_RW) &
+	POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+			 MFC_CNTL_PURGE_DMA_STATUS_MASK) ==
 			 MFC_CNTL_PURGE_DMA_COMPLETE);
 }
 
-static inline void save_mfc_slbs(struct spu_state *csa, struct spu *spu)
-{
-	struct spu_priv2 __iomem *priv2 = spu->priv2;
-	int i;
-
-	/* Save, Step 29:
-	 *     If MFC_SR1[R]='1', save SLBs in CSA.
-	 */
-	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) {
-		csa->priv2.slb_index_W = in_be64(&priv2->slb_index_W);
-		for (i = 0; i < 8; i++) {
-			out_be64(&priv2->slb_index_W, i);
-			eieio();
-			csa->slb_esid_RW[i] = in_be64(&priv2->slb_esid_RW);
-			csa->slb_vsid_RW[i] = in_be64(&priv2->slb_vsid_RW);
-			eieio();
-		}
-	}
-}
-
 static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
 {
 	/* Save, Step 30:
@@ -622,13 +629,18 @@ static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
 static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
 {
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
-	u64 idx, ch_indices[7] = { 0UL, 1UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
 	int i;
 
 	/* Save, Step 42:
-	 *     Save the following CH: [0,1,3,4,24,25,27]
 	 */
-	for (i = 0; i < 7; i++) {
+
+	/* Save CH 1, without channel count */
+	out_be64(&priv2->spu_chnlcntptr_RW, 1);
+	csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);
+
+	/* Save the following CH: [0,3,4,24,25,27] */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
 		idx = ch_indices[i];
 		out_be64(&priv2->spu_chnlcntptr_RW, idx);
 		eieio();
@@ -702,47 +714,9 @@ static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu)
 	out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE);
 }
 
-static inline void invalidate_slbs(struct spu_state *csa, struct spu *spu)
+static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu,
+		unsigned int *code, int code_size)
 {
-	struct spu_priv2 __iomem *priv2 = spu->priv2;
-
-	/* Save, Step 45:
-	 * Restore, Step 19:
-	 *     If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All.
-	 */
-	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) {
-		out_be64(&priv2->slb_invalidate_all_W, 0UL);
-		eieio();
-	}
-}
-
-static inline void get_kernel_slb(u64 ea, u64 slb[2])
-{
-	slb[0] = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | SLB_VSID_KERNEL;
-	slb[1] = (ea & ESID_MASK) | SLB_ESID_V;
-
-	/* Large pages are used for kernel text/data, but not vmalloc.  */
-	if (cpu_has_feature(CPU_FTR_16M_PAGE)
-	    && REGION_ID(ea) == KERNEL_REGION_ID)
-		slb[0] |= SLB_VSID_L;
-}
-
-static inline void load_mfc_slb(struct spu *spu, u64 slb[2], int slbe)
-{
-	struct spu_priv2 __iomem *priv2 = spu->priv2;
-
-	out_be64(&priv2->slb_index_W, slbe);
-	eieio();
-	out_be64(&priv2->slb_vsid_RW, slb[0]);
-	out_be64(&priv2->slb_esid_RW, slb[1]);
-	eieio();
-}
-
-static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu)
-{
-	u64 code_slb[2];
-	u64 lscsa_slb[2];
-
 	/* Save, Step 47:
 	 * Restore, Step 30.
 	 *     If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All
@@ -757,12 +731,8 @@ static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu)
 	 *     MFC_SR1[R]=1 (in other words, assume that
 	 *     translation is desired by OS environment).
 	 */
-	invalidate_slbs(csa, spu);
-	get_kernel_slb((unsigned long)&spu_save_code[0], code_slb);
-	get_kernel_slb((unsigned long)csa->lscsa, lscsa_slb);
-	load_mfc_slb(spu, code_slb, 0);
-	if ((lscsa_slb[0] != code_slb[0]) || (lscsa_slb[1] != code_slb[1]))
-		load_mfc_slb(spu, lscsa_slb, 1);
+	spu_invalidate_slbs(spu);
+	spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size);
 }
 
 static inline void set_switch_active(struct spu_state *csa, struct spu *spu)
@@ -770,9 +740,14 @@ static inline void set_switch_active(struct spu_state *csa, struct spu *spu)
 	/* Save, Step 48:
 	 * Restore, Step 23.
 	 *     Change the software context switch pending flag
-	 *     to context switch active.
+	 *     to context switch active.  This implementation does
+	 *     not uses a switch active flag.
+	 *
+	 * Now that we have saved the mfc in the csa, we can add in the
+	 * restart command if an exception occurred.
 	 */
-	set_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags);
+	if (test_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags))
+		csa->priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
 	clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
 	mb();
 }
@@ -791,9 +766,9 @@ static inline void enable_interrupts(struct spu_state *csa, struct spu *spu)
 	 *     (translation) interrupts.
 	 */
 	spin_lock_irq(&spu->register_lock);
-	spu_int_stat_clear(spu, 0, ~0ul);
-	spu_int_stat_clear(spu, 1, ~0ul);
-	spu_int_stat_clear(spu, 2, ~0ul);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
 	spu_int_mask_set(spu, 0, 0ul);
 	spu_int_mask_set(spu, 1, class1_mask);
 	spu_int_mask_set(spu, 2, 0ul);
@@ -950,8 +925,8 @@ static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu)
 	POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask);
 
 	local_irq_save(flags);
-	spu_int_stat_clear(spu, 0, ~(0ul));
-	spu_int_stat_clear(spu, 2, ~(0ul));
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
 	local_irq_restore(flags);
 }
 
@@ -969,8 +944,8 @@ static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu)
 	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
 
 	local_irq_save(flags);
-	spu_int_stat_clear(spu, 0, ~(0ul));
-	spu_int_stat_clear(spu, 2, ~(0ul));
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
 	local_irq_restore(flags);
 }
 
@@ -997,13 +972,13 @@ static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
 	 */
 }
 
-static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
+static inline void suspend_mfc_and_halt_decr(struct spu_state *csa,
+		struct spu *spu)
 {
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
 
 	/* Restore, Step 7:
-	 * Restore, Step 47.
-	 *     Write MFC_Cntl[Dh,Sc]='1','1' to suspend
+	 *     Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend
 	 *     the queue and halt the decrementer.
 	 */
 	out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
@@ -1020,7 +995,8 @@ static inline void wait_suspend_mfc_complete(struct spu_state *csa,
 	 * Restore, Step 47.
 	 *     Poll MFC_CNTL[Ss] until 11 is returned.
 	 */
-	POLL_WHILE_FALSE(in_be64(&priv2->mfc_control_RW) &
+	POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+			 MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
 			 MFC_CNTL_SUSPEND_COMPLETE);
 }
 
@@ -1037,12 +1013,12 @@ static inline int suspend_spe(struct spu_state *csa, struct spu *spu)
 	 */
 	if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) {
 		if (in_be32(&prob->spu_status_R) &
-		    SPU_STATUS_ISOLATED_EXIT_STAUTUS) {
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
 			POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
 					SPU_STATUS_RUNNING);
 		}
 		if ((in_be32(&prob->spu_status_R) &
-		     SPU_STATUS_ISOLATED_LOAD_STAUTUS)
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
 		    || (in_be32(&prob->spu_status_R) &
 			SPU_STATUS_ISOLATED_STATE)) {
 			out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
@@ -1076,7 +1052,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
 	 */
 	if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) {
 		if (in_be32(&prob->spu_status_R) &
-		    SPU_STATUS_ISOLATED_EXIT_STAUTUS) {
+		    SPU_STATUS_ISOLATED_EXIT_STATUS) {
 			spu_mfc_sr1_set(spu,
 					MFC_STATE1_MASTER_RUN_CONTROL_MASK);
 			eieio();
@@ -1086,7 +1062,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
 					SPU_STATUS_RUNNING);
 		}
 		if ((in_be32(&prob->spu_status_R) &
-		     SPU_STATUS_ISOLATED_LOAD_STAUTUS)
+		     SPU_STATUS_ISOLATED_LOAD_STATUS)
 		    || (in_be32(&prob->spu_status_R) &
 			SPU_STATUS_ISOLATED_STATE)) {
 			spu_mfc_sr1_set(spu,
@@ -1103,14 +1079,19 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
 static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
 {
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
-	u64 ch_indices[7] = { 0UL, 1UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
 	u64 idx;
 	int i;
 
 	/* Restore, Step 20:
-	 *     Reset the following CH: [0,1,3,4,24,25,27]
 	 */
-	for (i = 0; i < 7; i++) {
+
+	/* Reset CH 1 */
+	out_be64(&priv2->spu_chnlcntptr_RW, 1);
+	out_be64(&priv2->spu_chnldata_RW, 0UL);
+
+	/* Reset the following CH: [0,3,4,24,25,27] */
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
 		idx = ch_indices[i];
 		out_be64(&priv2->spu_chnlcntptr_RW, idx);
 		eieio();
@@ -1297,7 +1278,15 @@ static inline void setup_decr(struct spu_state *csa, struct spu *spu)
 		cycles_t resume_time = get_cycles();
 		cycles_t delta_time = resume_time - csa->suspend_time;
 
-		csa->lscsa->decr.slot[0] = delta_time;
+		csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING;
+		if (csa->lscsa->decr.slot[0] < delta_time) {
+			csa->lscsa->decr_status.slot[0] |=
+				 SPU_DECR_STATUS_WRAPPED;
+		}
+
+		csa->lscsa->decr.slot[0] -= delta_time;
+	} else {
+		csa->lscsa->decr_status.slot[0] = 0;
 	}
 }
 
@@ -1406,6 +1395,18 @@ static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu)
 	send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
 }
 
+static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Restore, Step 47.
+	 *     Write MFC_Cntl[Sc,Sm]='1','0' to suspend
+	 *     the queue.
+	 */
+	out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+	eieio();
+}
+
 static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
 {
 	/* Restore, Step 49:
@@ -1420,9 +1421,9 @@ static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
 	spu_int_mask_set(spu, 0, 0ul);
 	spu_int_mask_set(spu, 1, 0ul);
 	spu_int_mask_set(spu, 2, 0ul);
-	spu_int_stat_clear(spu, 0, ~0ul);
-	spu_int_stat_clear(spu, 1, ~0ul);
-	spu_int_stat_clear(spu, 2, ~0ul);
+	spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+	spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+	spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
 	spin_unlock_irq(&spu->register_lock);
 }
 
@@ -1556,27 +1557,27 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
 	 *     "wrapped" flag is set, OR in a '1' to
 	 *     CSA.SPU_Event_Status[Tm].
 	 */
-	if (csa->lscsa->decr_status.slot[0] == 1) {
-		csa->spu_chnldata_RW[0] |= 0x20;
-	}
-	if ((csa->lscsa->decr_status.slot[0] == 1) &&
-	    (csa->spu_chnlcnt_RW[0] == 0 &&
-	     ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) &&
-	     ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) {
+	if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED))
+		return;
+
+	if ((csa->spu_chnlcnt_RW[0] == 0) &&
+	    (csa->spu_chnldata_RW[1] & 0x20) &&
+	    !(csa->spu_chnldata_RW[0] & 0x20))
 		csa->spu_chnlcnt_RW[0] = 1;
-	}
+
+	csa->spu_chnldata_RW[0] |= 0x20;
 }
 
 static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu)
 {
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
-	u64 idx, ch_indices[7] = { 0UL, 1UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+	u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
 	int i;
 
 	/* Restore, Step 59:
-	 *     Restore the following CH: [0,1,3,4,24,25,27]
+	 *	Restore the following CH: [0,3,4,24,25,27]
 	 */
-	for (i = 0; i < 7; i++) {
+	for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
 		idx = ch_indices[i];
 		out_be64(&priv2->spu_chnlcntptr_RW, idx);
 		eieio();
@@ -1699,27 +1700,6 @@ static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
 	}
 }
 
-static inline void restore_mfc_slbs(struct spu_state *csa, struct spu *spu)
-{
-	struct spu_priv2 __iomem *priv2 = spu->priv2;
-	int i;
-
-	/* Restore, Step 68:
-	 *     If MFC_SR1[R]='1', restore SLBs from CSA.
-	 */
-	if (csa->priv1.mfc_sr1_RW & MFC_STATE1_RELOCATE_MASK) {
-		for (i = 0; i < 8; i++) {
-			out_be64(&priv2->slb_index_W, i);
-			eieio();
-			out_be64(&priv2->slb_esid_RW, csa->slb_esid_RW[i]);
-			out_be64(&priv2->slb_vsid_RW, csa->slb_vsid_RW[i]);
-			eieio();
-		}
-		out_be64(&priv2->slb_index_W, csa->priv2.slb_index_W);
-		eieio();
-	}
-}
-
 static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu)
 {
 	/* Restore, Step 69:
@@ -1729,6 +1709,13 @@ static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu)
 	eieio();
 }
 
+static inline void set_int_route(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_context *ctx = spu->ctx;
+
+	spu_cpu_affinity_set(spu, ctx->last_ran);
+}
+
 static inline void restore_other_spu_access(struct spu_state *csa,
 					    struct spu *spu)
 {
@@ -1760,6 +1747,15 @@ static inline void restore_mfc_cntl(struct spu_state *csa, struct spu *spu)
 	 */
 	out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW);
 	eieio();
+
+	/*
+	 * The queue is put back into the same state that was evident prior to
+	 * the context switch. The suspend flag is added to the saved state in
+	 * the csa, if the operational state was suspending or suspended. In
+	 * this case, the code that suspended the mfc is responsible for
+	 * continuing it. Note that SPE faults do not change the operational
+	 * state of the spu.
+	 */
 }
 
 static inline void enable_user_access(struct spu_state *csa, struct spu *spu)
@@ -1776,9 +1772,8 @@ static inline void reset_switch_active(struct spu_state *csa, struct spu *spu)
 {
 	/* Restore, Step 74:
 	 *     Reset the "context switch active" flag.
+	 *     Not performed by this implementation.
 	 */
-	clear_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags);
-	mb();
 }
 
 static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu)
@@ -1819,7 +1814,7 @@ static int quiece_spu(struct spu_state *prev, struct spu *spu)
 	save_spu_runcntl(prev, spu);	        /* Step 9. */
 	save_mfc_sr1(prev, spu);	        /* Step 10. */
 	save_spu_status(prev, spu);	        /* Step 11. */
-	save_mfc_decr(prev, spu);	        /* Step 12. */
+	save_mfc_stopped_status(prev, spu);     /* Step 12. */
 	halt_mfc_decr(prev, spu);	        /* Step 13. */
 	save_timebase(prev, spu);		/* Step 14. */
 	remove_other_spu_access(prev, spu);	/* Step 15. */
@@ -1840,14 +1835,15 @@ static void save_csa(struct spu_state *prev, struct spu *spu)
 	save_mfc_queues(prev, spu);	/* Step 19. */
 	save_ppu_querymask(prev, spu);	/* Step 20. */
 	save_ppu_querytype(prev, spu);	/* Step 21. */
+	save_ppu_tagstatus(prev, spu);  /* NEW.     */
 	save_mfc_csr_tsq(prev, spu);	/* Step 22. */
 	save_mfc_csr_cmd(prev, spu);	/* Step 23. */
 	save_mfc_csr_ato(prev, spu);	/* Step 24. */
 	save_mfc_tclass_id(prev, spu);	/* Step 25. */
 	set_mfc_tclass_id(prev, spu);	/* Step 26. */
+	save_mfc_cmd(prev, spu);	/* Step 26a - moved from 44. */
 	purge_mfc_queue(prev, spu);	/* Step 27. */
 	wait_purge_complete(prev, spu);	/* Step 28. */
-	save_mfc_slbs(prev, spu);	/* Step 29. */
 	setup_mfc_sr1(prev, spu);	/* Step 30. */
 	save_spu_npc(prev, spu);	/* Step 31. */
 	save_spu_privcntl(prev, spu);	/* Step 32. */
@@ -1862,7 +1858,6 @@ static void save_csa(struct spu_state *prev, struct spu *spu)
 	save_ppuint_mb(prev, spu);	/* Step 41. */
 	save_ch_part1(prev, spu);	/* Step 42. */
 	save_spu_mb(prev, spu);	        /* Step 43. */
-	save_mfc_cmd(prev, spu);	/* Step 44. */
 	reset_ch(prev, spu);	        /* Step 45. */
 }
 
@@ -1875,7 +1870,8 @@ static void save_lscsa(struct spu_state *prev, struct spu *spu)
 	 */
 
 	resume_mfc_queue(prev, spu);	/* Step 46. */
-	setup_mfc_slbs(prev, spu);	/* Step 47. */
+	/* Step 47. */
+	setup_mfc_slbs(prev, spu, spu_save_code, sizeof(spu_save_code));
 	set_switch_active(prev, spu);	/* Step 48. */
 	enable_interrupts(prev, spu);	/* Step 49. */
 	save_ls_16kb(prev, spu);	/* Step 50. */
@@ -1888,6 +1884,51 @@ static void save_lscsa(struct spu_state *prev, struct spu *spu)
 	wait_spu_stopped(prev, spu);	/* Step 57. */
 }
 
+static void force_spu_isolate_exit(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+	struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+	/* Stop SPE execution and wait for completion. */
+	out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+	iobarrier_rw();
+	POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+	/* Restart SPE master runcntl. */
+	spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+	iobarrier_w();
+
+	/* Initiate isolate exit request and wait for completion. */
+	out_be64(&priv2->spu_privcntl_RW, 4LL);
+	iobarrier_w();
+	out_be32(&prob->spu_runcntl_RW, 2);
+	iobarrier_rw();
+	POLL_WHILE_FALSE((in_be32(&prob->spu_status_R)
+				& SPU_STATUS_STOPPED_BY_STOP));
+
+	/* Reset load request to normal. */
+	out_be64(&priv2->spu_privcntl_RW, SPU_PRIVCNT_LOAD_REQUEST_NORMAL);
+	iobarrier_w();
+}
+
+/**
+ * stop_spu_isolate
+ *	Check SPU run-control state and force isolated
+ *	exit function as necessary.
+ */
+static void stop_spu_isolate(struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) {
+		/* The SPU is in isolated state; the only way
+		 * to get it out is to perform an isolated
+		 * exit (clean) operation.
+		 */
+		force_spu_isolate_exit(spu);
+	}
+}
+
 static void harvest(struct spu_state *prev, struct spu *spu)
 {
 	/*
@@ -1900,8 +1941,9 @@ static void harvest(struct spu_state *prev, struct spu *spu)
 	inhibit_user_access(prev, spu);	        /* Step 3.  */
 	terminate_spu_app(prev, spu);	        /* Step 4.  */
 	set_switch_pending(prev, spu);	        /* Step 5.  */
+	stop_spu_isolate(spu);			/* NEW.     */
 	remove_other_spu_access(prev, spu);	/* Step 6.  */
-	suspend_mfc(prev, spu);	                /* Step 7.  */
+	suspend_mfc_and_halt_decr(prev, spu);	/* Step 7.  */
 	wait_suspend_mfc_complete(prev, spu);	/* Step 8.  */
 	if (!suspend_spe(prev, spu))	        /* Step 9.  */
 		clear_spu_status(prev, spu);	/* Step 10. */
@@ -1913,7 +1955,7 @@ static void harvest(struct spu_state *prev, struct spu *spu)
 	reset_spu_privcntl(prev, spu);	        /* Step 16. */
 	reset_spu_lslr(prev, spu);              /* Step 17. */
 	setup_mfc_sr1(prev, spu);	        /* Step 18. */
-	invalidate_slbs(prev, spu);	        /* Step 19. */
+	spu_invalidate_slbs(spu);		/* Step 19. */
 	reset_ch_part1(prev, spu);	        /* Step 20. */
 	reset_ch_part2(prev, spu);	        /* Step 21. */
 	enable_interrupts(prev, spu);	        /* Step 22. */
@@ -1934,7 +1976,8 @@ static void restore_lscsa(struct spu_state *next, struct spu *spu)
 	setup_spu_status_part1(next, spu);	/* Step 27. */
 	setup_spu_status_part2(next, spu);	/* Step 28. */
 	restore_mfc_rag(next, spu);	        /* Step 29. */
-	setup_mfc_slbs(next, spu);	        /* Step 30. */
+	/* Step 30. */
+	setup_mfc_slbs(next, spu, spu_restore_code, sizeof(spu_restore_code));
 	set_spu_npc(next, spu);	                /* Step 31. */
 	set_signot1(next, spu);	                /* Step 32. */
 	set_signot2(next, spu);	                /* Step 33. */
@@ -1981,8 +2024,9 @@ static void restore_csa(struct spu_state *next, struct spu *spu)
 	restore_spu_mb(next, spu);	        /* Step 65. */
 	check_ppu_mb_stat(next, spu);	        /* Step 66. */
 	check_ppuint_mb_stat(next, spu);	/* Step 67. */
-	restore_mfc_slbs(next, spu);	        /* Step 68. */
+	spu_invalidate_slbs(spu);		/* Modified Step 68. */
 	restore_mfc_sr1(next, spu);	        /* Step 69. */
+	set_int_route(next, spu);		/* NEW      */
 	restore_other_spu_access(next, spu);	/* Step 70. */
 	restore_spu_runcntl(next, spu);	        /* Step 71. */
 	restore_mfc_cntl(next, spu);	        /* Step 72. */
@@ -2068,12 +2112,13 @@ int spu_save(struct spu_state *prev, struct spu *spu)
 	acquire_spu_lock(spu);	        /* Step 1.     */
 	rc = __do_spu_save(prev, spu);	/* Steps 2-53. */
 	release_spu_lock(spu);
-	if (rc) {
+	if (rc != 0 && rc != 2 && rc != 6) {
 		panic("%s failed on SPU[%d], rc=%d.\n",
 		      __func__, spu->number, rc);
 	}
-	return rc;
+	return 0;
 }
+EXPORT_SYMBOL_GPL(spu_save);
 
 /**
  * spu_restore - SPU context restore, with harvest and locking.
@@ -2081,7 +2126,7 @@ int spu_save(struct spu_state *prev, struct spu *spu)
  * @spu: pointer to SPU iomem structure.
  *
  * Perform harvest + restore, as we may not be coming
- * from a previous succesful save operation, and the
+ * from a previous successful save operation, and the
  * hardware state is unknown.
  */
 int spu_restore(struct spu_state *new, struct spu *spu)
@@ -2090,11 +2135,7 @@ int spu_restore(struct spu_state *new, struct spu *spu)
 
 	acquire_spu_lock(spu);
 	harvest(NULL, spu);
-	spu->stop_code = 0;
-	spu->dar = 0;
-	spu->dsisr = 0;
 	spu->slb_replace = 0;
-	spu->class_0_pending = 0;
 	rc = __do_spu_restore(new, spu);
 	release_spu_lock(spu);
 	if (rc) {
@@ -2103,19 +2144,7 @@ int spu_restore(struct spu_state *new, struct spu *spu)
 	}
 	return rc;
 }
-
-/**
- * spu_harvest - SPU harvest (reset) operation
- * @spu: pointer to SPU iomem structure.
- *
- * Perform SPU harvest (reset) operation.
- */
-void spu_harvest(struct spu *spu)
-{
-	acquire_spu_lock(spu);
-	harvest(NULL, spu);
-	release_spu_lock(spu);
-}
+EXPORT_SYMBOL_GPL(spu_restore);
 
 static void init_prob(struct spu_state *csa)
 {
@@ -2125,6 +2154,7 @@ static void init_prob(struct spu_state *csa)
 	csa->spu_chnlcnt_RW[28] = 1;
 	csa->spu_chnlcnt_RW[30] = 1;
 	csa->prob.spu_runcntl_RW = SPU_RUNCNTL_STOP;
+	csa->prob.mb_stat_R = 0x000400;
 }
 
 static void init_priv1(struct spu_state *csa)
@@ -2135,9 +2165,6 @@ static void init_priv1(struct spu_state *csa)
 	    MFC_STATE1_PROBLEM_STATE_MASK |
 	    MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK;
 
-	/* Set storage description.  */
-	csa->priv1.mfc_sdr_RW = mfspr(SPRN_SDR1);
-
 	/* Enable OS-specific set of interrupts. */
 	csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR |
 	    CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR |
@@ -2145,7 +2172,8 @@ static void init_priv1(struct spu_state *csa)
 	csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
 	    CLASS1_ENABLE_STORAGE_FAULT_INTR;
 	csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR |
-	    CLASS2_ENABLE_SPU_HALT_INTR;
+	    CLASS2_ENABLE_SPU_HALT_INTR |
+	    CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR;
 }
 
 static void init_priv2(struct spu_state *csa)
@@ -2167,38 +2195,28 @@ static void init_priv2(struct spu_state *csa)
  * as it is by far the largest of the context save regions,
  * and may need to be pinned or otherwise specially aligned.
  */
-void spu_init_csa(struct spu_state *csa)
+int spu_init_csa(struct spu_state *csa)
 {
-	struct spu_lscsa *lscsa;
-	unsigned char *p;
+	int rc;
 
 	if (!csa)
-		return;
+		return -EINVAL;
 	memset(csa, 0, sizeof(struct spu_state));
 
-	lscsa = vmalloc(sizeof(struct spu_lscsa));
-	if (!lscsa)
-		return;
-
-	memset(lscsa, 0, sizeof(struct spu_lscsa));
-	csa->lscsa = lscsa;
-	csa->register_lock = SPIN_LOCK_UNLOCKED;
+	rc = spu_alloc_lscsa(csa);
+	if (rc)
+		return rc;
 
-	/* Set LS pages reserved to allow for user-space mapping. */
-	for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
-		SetPageReserved(vmalloc_to_page(p));
+	spin_lock_init(&csa->register_lock);
 
 	init_prob(csa);
 	init_priv1(csa);
 	init_priv2(csa);
+
+	return 0;
 }
 
 void spu_fini_csa(struct spu_state *csa)
 {
-	/* Clear reserved bit before vfree. */
-	unsigned char *p;
-	for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
-		ClearPageReserved(vmalloc_to_page(p));
-
-	vfree(csa->lscsa);
+	spu_free_lscsa(csa);
 }
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index e6565a949dd..a87200a535f 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -1,8 +1,9 @@
 #include <linux/file.h>
 #include <linux/fs.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
 
 #include <asm/uaccess.h>
 
@@ -38,7 +39,7 @@ static long do_spu_run(struct file *filp,
 	u32 npc, status;
 
 	ret = -EFAULT;
-	if (get_user(npc, unpc) || get_user(status, ustatus))
+	if (get_user(npc, unpc))
 		goto out;
 
 	/* check if this file was created by spu_create */
@@ -46,58 +47,42 @@ static long do_spu_run(struct file *filp,
 	if (filp->f_op != &spufs_context_fops)
 		goto out;
 
-	i = SPUFS_I(filp->f_dentry->d_inode);
-	ret = spufs_run_spu(filp, i->i_ctx, &npc, &status);
+	i = SPUFS_I(file_inode(filp));
+	ret = spufs_run_spu(i->i_ctx, &npc, &status);
 
-	if (put_user(npc, unpc) || put_user(status, ustatus))
+	if (put_user(npc, unpc))
 		ret = -EFAULT;
-out:
-	return ret;
-}
-
-#ifndef MODULE
-asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
-{
-	int fput_needed;
-	struct file *filp;
-	long ret;
-
-	ret = -EBADF;
-	filp = fget_light(fd, &fput_needed);
-	if (filp) {
-		ret = do_spu_run(filp, unpc, ustatus);
-		fput_light(filp, fput_needed);
-	}
 
+	if (ustatus && put_user(status, ustatus))
+		ret = -EFAULT;
+out:
 	return ret;
 }
-#endif
 
-asmlinkage long sys_spu_create(const char __user *pathname,
-					unsigned int flags, mode_t mode)
+static long do_spu_create(const char __user *pathname, unsigned int flags,
+		umode_t mode, struct file *neighbor)
 {
-	char *tmp;
+	struct path path;
+	struct dentry *dentry;
 	int ret;
 
-	tmp = getname(pathname);
-	ret = PTR_ERR(tmp);
-	if (!IS_ERR(tmp)) {
-		struct nameidata nd;
-
-		ret = path_lookup(tmp, LOOKUP_PARENT|
-				LOOKUP_OPEN|LOOKUP_CREATE, &nd);
-		if (!ret) {
-			ret = spufs_create_thread(&nd, flags, mode);
-			path_release(&nd);
-		}
-		putname(tmp);
+	dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
+	ret = PTR_ERR(dentry);
+	if (!IS_ERR(dentry)) {
+		ret = spufs_create(&path, dentry, flags, mode, neighbor);
+		done_path_create(&path, dentry);
 	}
 
 	return ret;
 }
 
 struct spufs_calls spufs_calls = {
-	.create_thread = sys_spu_create,
+	.create_thread = do_spu_create,
 	.spu_run = do_spu_run,
+	.notify_spus_active = do_notify_spus_active,
 	.owner = THIS_MODULE,
+#ifdef CONFIG_COREDUMP
+	.coredump_extra_notes_size = spufs_coredump_extra_notes_size,
+	.coredump_extra_notes_write = spufs_coredump_extra_notes_write,
+#endif
 };