201 files changed, 10713 insertions, 12441 deletions
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index d6c7506ec7d..4d88f6a1905 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -199,6 +199,34 @@ config CURRITUCK
 	help
 	  This option enables support for the IBM Currituck (476fpe) evaluation board
 
+config AKEBONO
+	bool "IBM Akebono (476gtr) Support"
+	depends on PPC_47x
+	default n
+	select SWIOTLB
+	select 476FPE
+	select PPC4xx_PCI_EXPRESS
+	select PCI_MSI
+	select PPC4xx_HSTA_MSI
+	select I2C
+	select I2C_IBM_IIC
+	select NETDEVICES
+	select ETHERNET
+	select NET_VENDOR_IBM
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII_WOL
+	select USB
+	select USB_OHCI_HCD_PLATFORM
+	select USB_EHCI_HCD_PLATFORM
+	select MMC_SDHCI
+	select MMC_SDHCI_PLTFM
+	select MMC_SDHCI_OF_476GTR
+	select ATA
+	select SATA_AHCI_PLATFORM
+	help
+	  This option enables support for the IBM Akebono (476gtr) evaluation board
+
+
 config ICON
 	bool "Icon"
 	depends on 44x
@@ -265,7 +293,6 @@ config 440EP
 	select PPC_FPU
 	select IBM440EP_ERR42
 	select IBM_EMAC_ZMII
-	select USB_ARCH_HAS_OHCI
 
 config 440EPX
 	bool
@@ -324,6 +351,20 @@ config APM821xx
 	select IBM_EMAC_EMAC4
 	select IBM_EMAC_TAH
 
+config 476FPE_ERR46
+	depends on 476FPE
+	bool "Enable linker work around for PPC476FPE errata #46"
+	help
+	  This option enables a work around for an icache bug on 476
+	  that can cause execution of stale instructions when falling
+	  through pages (IBM errata #46). It requires a recent version
+	  of binutils which supports the --ppc476-workaround option.
+
+	  The work around enables the appropriate linker options and
+	  ensures that all module output sections are aligned to 4K
+	  page boundaries. The work around is only required when
+	  building modules.
+
 # 44x errata/workaround config symbols, selected by the CPU models above
 config IBM440EP_ERR42
 	bool
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
index d03833abec0..26d35b5941f 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -10,4 +10,5 @@ obj-$(CONFIG_XILINX_VIRTEX_5_FXT) += virtex.o
 obj-$(CONFIG_XILINX_ML510) += virtex_ml510.o
 obj-$(CONFIG_ISS4xx)	+= iss4xx.o
 obj-$(CONFIG_CANYONLANDS)+= canyonlands.o
-obj-$(CONFIG_CURRITUCK)	+= currituck.o
+obj-$(CONFIG_CURRITUCK)	+= ppc476.o
+obj-$(CONFIG_AKEBONO)	+= ppc476.o
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
index a28a8629727..4241bc82580 100644
--- a/arch/powerpc/platforms/44x/iss4xx.c
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -81,12 +81,12 @@ static void __init iss4xx_init_irq(void)
 }
 
 #ifdef CONFIG_SMP
-static void __cpuinit smp_iss4xx_setup_cpu(int cpu)
+static void smp_iss4xx_setup_cpu(int cpu)
 {
 	mpic_setup_this_cpu();
 }
 
-static int __cpuinit smp_iss4xx_kick_cpu(int cpu)
+static int smp_iss4xx_kick_cpu(int cpu)
 {
 	struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
 	const u64 *spin_table_addr_prop;
diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/ppc476.c
index ecd3890c40d..33986c1a05d 100644
--- a/arch/powerpc/platforms/44x/currituck.c
+++ b/arch/powerpc/platforms/44x/ppc476.c
@@ -1,7 +1,8 @@
 /*
- * Currituck board specific routines
+ * PowerPC 476FPE board specific routines
  *
- * Copyright © 2011 Tony Breeds IBM Corporation
+ * Copyright © 2013 Tony Breeds IBM Corporation
+ * Copyright © 2013 Alistair Popple IBM Corporation
  *
  * Based on earlier code:
  *    Matt Porter <mporter@kernel.crashing.org>
@@ -35,8 +36,9 @@
 #include <asm/mmu.h>
 
 #include <linux/pci.h>
+#include <linux/i2c.h>
 
-static __initdata struct of_device_id ppc47x_of_bus[] = {
+static struct of_device_id ppc47x_of_bus[] __initdata = {
 	{ .compatible = "ibm,plb4", },
 	{ .compatible = "ibm,plb6", },
 	{ .compatible = "ibm,opb", },
@@ -55,15 +57,69 @@ static void quirk_ppc_currituck_usb_fixup(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(0x1033, 0x0035, quirk_ppc_currituck_usb_fixup);
 
+/* Akebono has an AVR microcontroller attached to the I2C bus
+ * which is used to power off/reset the system. */
+
+/* AVR I2C Commands */
+#define AVR_PWRCTL_CMD (0x26)
+
+/* Flags for the power control I2C commands */
+#define AVR_PWRCTL_PWROFF (0x01)
+#define AVR_PWRCTL_RESET (0x02)
+
+static struct i2c_client *avr_i2c_client;
+static void avr_halt_system(int pwrctl_flags)
+{
+	/* Request the AVR to reset the system */
+	i2c_smbus_write_byte_data(avr_i2c_client,
+				  AVR_PWRCTL_CMD, pwrctl_flags);
+
+	/* Wait for system to be reset */
+	while (1)
+		;
+}
+
+static void avr_power_off_system(void)
+{
+	avr_halt_system(AVR_PWRCTL_PWROFF);
+}
+
+static void avr_reset_system(char *cmd)
+{
+	avr_halt_system(AVR_PWRCTL_RESET);
+}
+
+static int avr_probe(struct i2c_client *client,
+			    const struct i2c_device_id *id)
+{
+	avr_i2c_client = client;
+	ppc_md.restart = avr_reset_system;
+	ppc_md.power_off = avr_power_off_system;
+	return 0;
+}
+
+static const struct i2c_device_id avr_id[] = {
+	{ "akebono-avr", 0 },
+	{ }
+};
+
+static struct i2c_driver avr_driver = {
+	.driver = {
+		.name = "akebono-avr",
+	},
+	.probe = avr_probe,
+	.id_table = avr_id,
+};
+
 static int __init ppc47x_device_probe(void)
 {
+	i2c_add_driver(&avr_driver);
 	of_platform_bus_probe(NULL, ppc47x_of_bus, NULL);
 
 	return 0;
 }
 machine_device_initcall(ppc47x, ppc47x_device_probe);
 
-/* We can have either UICs or MPICs */
 static void __init ppc47x_init_irq(void)
 {
 	struct device_node *np;
@@ -91,12 +147,12 @@ static void __init ppc47x_init_irq(void)
 }
 
 #ifdef CONFIG_SMP
-static void __cpuinit smp_ppc47x_setup_cpu(int cpu)
+static void smp_ppc47x_setup_cpu(int cpu)
 {
 	mpic_setup_this_cpu();
 }
 
-static int __cpuinit smp_ppc47x_kick_cpu(int cpu)
+static int smp_ppc47x_kick_cpu(int cpu)
 {
 	struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
 	const u64 *spin_table_addr_prop;
@@ -157,33 +213,79 @@ static void __init ppc47x_setup_arch(void)
 {
 
 	/* No need to check the DMA config as we /know/ our windows are all of
- 	 * RAM.  Lets hope that doesn't change */
+	 * RAM.  Lets hope that doesn't change */
 	swiotlb_detect_4g();
 
 	ppc47x_smp_init();
 }
 
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init ppc47x_probe(void)
+static int board_rev = -1;
+static int __init ppc47x_get_board_rev(void)
 {
-	unsigned long root = of_get_flat_dt_root();
+	int reg;
+	u8 *fpga;
+	struct device_node *np = NULL;
+
+	if (of_machine_is_compatible("ibm,currituck")) {
+		np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga");
+		reg = 0;
+	} else if (of_machine_is_compatible("ibm,akebono")) {
+		np = of_find_compatible_node(NULL, NULL, "ibm,akebono-fpga");
+		reg = 2;
+	}
+
+	if (!np)
+		goto fail;
 
-	if (!of_flat_dt_is_compatible(root, "ibm,currituck"))
-		return 0;
+	fpga = (u8 *) of_iomap(np, 0);
+	of_node_put(np);
+	if (!fpga)
+		goto fail;
 
-	return 1;
+	board_rev = ioread8(fpga + reg) & 0x03;
+	pr_info("%s: Found board revision %d\n", __func__, board_rev);
+	iounmap(fpga);
+	return 0;
+
+fail:
+	pr_info("%s: Unable to find board revision\n", __func__);
+	return 0;
 }
+machine_arch_initcall(ppc47x, ppc47x_get_board_rev);
 
 /* Use USB controller should have been hardware swizzled but it wasn't :( */
 static void ppc47x_pci_irq_fixup(struct pci_dev *dev)
 {
 	if (dev->vendor == 0x1033 && (dev->device == 0x0035 ||
-	                              dev->device == 0x00e0)) {
-		dev->irq = irq_create_mapping(NULL, 47);
-		pr_info("%s: Mapping irq 47 %d\n", __func__, dev->irq);
+				      dev->device == 0x00e0)) {
+		if (board_rev == 0) {
+			dev->irq = irq_create_mapping(NULL, 47);
+			pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
+		} else if (board_rev == 2) {
+			dev->irq = irq_create_mapping(NULL, 49);
+			pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
+		} else {
+			pr_alert("%s: Unknown board revision\n", __func__);
+		}
+	}
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init ppc47x_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "ibm,akebono"))
+		return 1;
+
+	if (of_flat_dt_is_compatible(root, "ibm,currituck")) {
+		ppc_md.pci_irq_fixup = ppc47x_pci_irq_fixup;
+		return 1;
 	}
+
+	return 0;
 }
 
 define_machine(ppc47x) {
@@ -192,7 +294,6 @@ define_machine(ppc47x) {
 	.progress		= udbg_progress,
 	.init_IRQ		= ppc47x_init_irq,
 	.setup_arch		= ppc47x_setup_arch,
-	.pci_irq_fixup		= ppc47x_pci_irq_fixup,
 	.restart		= ppc4xx_reset_system,
 	.calibrate_decr		= generic_calibrate_decr,
 };
diff --git a/arch/powerpc/platforms/44x/ppc476_modules.lds b/arch/powerpc/platforms/44x/ppc476_modules.lds
new file mode 100644
index 00000000000..9fec5d34ba8
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc476_modules.lds
@@ -0,0 +1,15 @@
+SECTIONS
+{
+	.text : ALIGN(4096)
+	{
+		*(.text .text.* .fixup)
+	}
+	.init.text : ALIGN(4096)
+	{
+		*(.init.text .init.text.*)
+	}
+	.exit.text : ALIGN(4096)
+	{
+		*(.exit.text .exit.text.*)
+	}
+}
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 4cfa49901c0..534574a97ec 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -16,7 +16,6 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/of_gpio.h>
-#include <linux/of_i2c.h>
 #include <linux/slab.h>
 #include <linux/export.h>
 
diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig
index fc9c1cbfcb1..5aa3f4b5332 100644
--- a/arch/powerpc/platforms/512x/Kconfig
+++ b/arch/powerpc/platforms/512x/Kconfig
@@ -1,9 +1,9 @@
 config PPC_MPC512x
 	bool "512x-based boards"
 	depends on 6xx
+	select COMMON_CLK
 	select FSL_SOC
 	select IPIC
-	select PPC_CLOCK
 	select PPC_PCI_CHOICE
 	select FSL_PCI if PCI
 	select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/powerpc/platforms/512x/Makefile b/arch/powerpc/platforms/512x/Makefile
index 72fb9340e09..01693121a2b 100644
--- a/arch/powerpc/platforms/512x/Makefile
+++ b/arch/powerpc/platforms/512x/Makefile
@@ -1,7 +1,8 @@
 #
 # Makefile for the Freescale PowerPC 512x linux kernel.
 #
-obj-y				+= clock.o mpc512x_shared.o
+obj-$(CONFIG_COMMON_CLK)	+= clock-commonclk.o
+obj-y				+= mpc512x_shared.o
 obj-$(CONFIG_MPC5121_ADS)	+= mpc5121_ads.o mpc5121_ads_cpld.o
 obj-$(CONFIG_MPC512x_GENERIC)	+= mpc512x_generic.o
 obj-$(CONFIG_PDM360NG)		+= pdm360ng.o
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
new file mode 100644
index 00000000000..6eb614a271f
--- /dev/null
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -0,0 +1,1221 @@
+/*
+ * Copyright (C) 2013 DENX Software Engineering
+ *
+ * Gerhard Sittig, <gsi@denx.de>
+ *
+ * common clock driver support for the MPC512x platform
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/mpc5121.h>
+#include <dt-bindings/clock/mpc512x-clock.h>
+
+#include "mpc512x.h"		/* our public mpc5121_clk_init() API */
+
+/* helpers to keep the MCLK intermediates "somewhere" in our table */
+enum {
+	MCLK_IDX_MUX0,
+	MCLK_IDX_EN0,
+	MCLK_IDX_DIV0,
+	MCLK_MAX_IDX,
+};
+
+#define NR_PSCS			12
+#define NR_MSCANS		4
+#define NR_SPDIFS		1
+#define NR_OUTCLK		4
+#define NR_MCLKS		(NR_PSCS + NR_MSCANS + NR_SPDIFS + NR_OUTCLK)
+
+/* extend the public set of clocks by adding internal slots for management */
+enum {
+	/* arrange for adjacent numbers after the public set */
+	MPC512x_CLK_START_PRIVATE = MPC512x_CLK_LAST_PUBLIC,
+	/* clocks which aren't announced to the public */
+	MPC512x_CLK_DDR,
+	MPC512x_CLK_MEM,
+	MPC512x_CLK_IIM,
+	/* intermediates in div+gate combos or fractional dividers */
+	MPC512x_CLK_DDR_UG,
+	MPC512x_CLK_SDHC_x4,
+	MPC512x_CLK_SDHC_UG,
+	MPC512x_CLK_SDHC2_UG,
+	MPC512x_CLK_DIU_x4,
+	MPC512x_CLK_DIU_UG,
+	MPC512x_CLK_MBX_BUS_UG,
+	MPC512x_CLK_MBX_UG,
+	MPC512x_CLK_MBX_3D_UG,
+	MPC512x_CLK_PCI_UG,
+	MPC512x_CLK_NFC_UG,
+	MPC512x_CLK_LPC_UG,
+	MPC512x_CLK_SPDIF_TX_IN,
+	/* intermediates for the mux+gate+div+mux MCLK generation */
+	MPC512x_CLK_MCLKS_FIRST,
+	MPC512x_CLK_MCLKS_LAST = MPC512x_CLK_MCLKS_FIRST
+				+ NR_MCLKS * MCLK_MAX_IDX,
+	/* internal, symbolic spec for the number of slots */
+	MPC512x_CLK_LAST_PRIVATE,
+};
+
+/* data required for the OF clock provider registration */
+static struct clk *clks[MPC512x_CLK_LAST_PRIVATE];
+static struct clk_onecell_data clk_data;
+
+/* CCM register access */
+static struct mpc512x_ccm __iomem *clkregs;
+static DEFINE_SPINLOCK(clklock);
+
+/* SoC variants {{{ */
+
+/*
+ * tell SoC variants apart as they are rather similar yet not identical,
+ * cache the result in an enum to not repeatedly run the expensive OF test
+ *
+ * MPC5123 is an MPC5121 without the MBX graphics accelerator
+ *
+ * MPC5125 has many more differences: no MBX, no AXE, no VIU, no SPDIF,
+ * no PATA, no SATA, no PCI, two FECs (of different compatibility name),
+ * only 10 PSCs (of different compatibility name), two SDHCs, different
+ * NFC IP block, output clocks, system PLL status query, different CPMF
+ * interpretation, no CFM, different fourth PSC/CAN mux0 input -- yet
+ * those differences can get folded into this clock provider support
+ * code and don't warrant a separate highly redundant implementation
+ */
+
+static enum soc_type {
+	MPC512x_SOC_MPC5121,
+	MPC512x_SOC_MPC5123,
+	MPC512x_SOC_MPC5125,
+} soc;
+
+static void mpc512x_clk_determine_soc(void)
+{
+	if (of_machine_is_compatible("fsl,mpc5121")) {
+		soc = MPC512x_SOC_MPC5121;
+		return;
+	}
+	if (of_machine_is_compatible("fsl,mpc5123")) {
+		soc = MPC512x_SOC_MPC5123;
+		return;
+	}
+	if (of_machine_is_compatible("fsl,mpc5125")) {
+		soc = MPC512x_SOC_MPC5125;
+		return;
+	}
+}
+
+static bool soc_has_mbx(void)
+{
+	if (soc == MPC512x_SOC_MPC5121)
+		return true;
+	return false;
+}
+
+static bool soc_has_axe(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_viu(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_spdif(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_pata(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_sata(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_pci(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_fec2(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static int soc_max_pscnum(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return 10;
+	return 12;
+}
+
+static bool soc_has_sdhc2(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_nfc_5125(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_outclk(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_cpmf_0_bypass(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_mclk_mux0_canin(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+/* }}} SoC variants */
+/* common clk API wrappers {{{ */
+
+/* convenience wrappers around the common clk API */
+static inline struct clk *mpc512x_clk_fixed(const char *name, int rate)
+{
+	return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate);
+}
+
+static inline struct clk *mpc512x_clk_factor(
+	const char *name, const char *parent_name,
+	int mul, int div)
+{
+	int clkflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	return clk_register_fixed_factor(NULL, name, parent_name, clkflags,
+					 mul, div);
+}
+
+static inline struct clk *mpc512x_clk_divider(
+	const char *name, const char *parent_name, u8 clkflags,
+	u32 __iomem *reg, u8 pos, u8 len, int divflags)
+{
+	return clk_register_divider(NULL, name, parent_name, clkflags,
+				    reg, pos, len, divflags, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_divtable(
+	const char *name, const char *parent_name,
+	u32 __iomem *reg, u8 pos, u8 len,
+	const struct clk_div_table *divtab)
+{
+	u8 divflags;
+
+	divflags = 0;
+	return clk_register_divider_table(NULL, name, parent_name, 0,
+					  reg, pos, len, divflags,
+					  divtab, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_gated(
+	const char *name, const char *parent_name,
+	u32 __iomem *reg, u8 pos)
+{
+	int clkflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	return clk_register_gate(NULL, name, parent_name, clkflags,
+				 reg, pos, 0, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_muxed(const char *name,
+	const char **parent_names, int parent_count,
+	u32 __iomem *reg, u8 pos, u8 len)
+{
+	int clkflags;
+	u8 muxflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	muxflags = 0;
+	return clk_register_mux(NULL, name,
+				parent_names, parent_count, clkflags,
+				reg, pos, len, muxflags, &clklock);
+}
+
+/* }}} common clk API wrappers */
+
+/* helper to isolate a bit field from a register */
+static inline int get_bit_field(uint32_t __iomem *reg, uint8_t pos, uint8_t len)
+{
+	uint32_t val;
+
+	val = in_be32(reg);
+	val >>= pos;
+	val &= (1 << len) - 1;
+	return val;
+}
+
+/* get the SPMF and translate it into the "sys pll" multiplier */
+static int get_spmf_mult(void)
+{
+	static int spmf_to_mult[] = {
+		68, 1, 12, 16, 20, 24, 28, 32,
+		36, 40, 44, 48, 52, 56, 60, 64,
+	};
+	int spmf;
+
+	spmf = get_bit_field(&clkregs->spmr, 24, 4);
+	return spmf_to_mult[spmf];
+}
+
+/*
+ * get the SYS_DIV value and translate it into a divide factor
+ *
+ * values returned from here are a multiple of the real factor since the
+ * divide ratio is fractional
+ */
+static int get_sys_div_x2(void)
+{
+	static int sysdiv_code_to_x2[] = {
+		4, 5, 6, 7, 8, 9, 10, 14,
+		12, 16, 18, 22, 20, 24, 26, 30,
+		28, 32, 34, 38, 36, 40, 42, 46,
+		44, 48, 50, 54, 52, 56, 58, 62,
+		60, 64, 66,
+	};
+	int divcode;
+
+	divcode = get_bit_field(&clkregs->scfr2, 26, 6);
+	return sysdiv_code_to_x2[divcode];
+}
+
+/*
+ * get the CPMF value and translate it into a multiplier factor
+ *
+ * values returned from here are a multiple of the real factor since the
+ * multiplier ratio is fractional
+ */
+static int get_cpmf_mult_x2(void)
+{
+	static int cpmf_to_mult_x36[] = {
+		/* 0b000 is "times 36" */
+		72, 2, 2, 3, 4, 5, 6, 7,
+	};
+	static int cpmf_to_mult_0by[] = {
+		/* 0b000 is "bypass" */
+		2, 2, 2, 3, 4, 5, 6, 7,
+	};
+
+	int *cpmf_to_mult;
+	int cpmf;
+
+	cpmf = get_bit_field(&clkregs->spmr, 16, 4);
+	if (soc_has_cpmf_0_bypass())
+		cpmf_to_mult = cpmf_to_mult_0by;
+	else
+		cpmf_to_mult = cpmf_to_mult_x36;
+	return cpmf_to_mult[cpmf];
+}
+
+/*
+ * some of the clock dividers do scale in a linear way, yet not all of
+ * their bit combinations are legal; use a divider table to get a
+ * resulting set of applicable divider values
+ */
+
+/* applies to the IPS_DIV, and PCI_DIV values */
+static struct clk_div_table divtab_2346[] = {
+	{ .val = 2, .div = 2, },
+	{ .val = 3, .div = 3, },
+	{ .val = 4, .div = 4, },
+	{ .val = 6, .div = 6, },
+	{ .div = 0, },
+};
+
+/* applies to the MBX_DIV, LPC_DIV, and NFC_DIV values */
+static struct clk_div_table divtab_1234[] = {
+	{ .val = 1, .div = 1, },
+	{ .val = 2, .div = 2, },
+	{ .val = 3, .div = 3, },
+	{ .val = 4, .div = 4, },
+	{ .div = 0, },
+};
+
+static int get_freq_from_dt(char *propname)
+{
+	struct device_node *np;
+	const unsigned int *prop;
+	int val;
+
+	val = 0;
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-immr");
+	if (np) {
+		prop = of_get_property(np, propname, NULL);
+		if (prop)
+			val = *prop;
+	    of_node_put(np);
+	}
+	return val;
+}
+
+static void mpc512x_clk_preset_data(void)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(clks); i++)
+		clks[i] = ERR_PTR(-ENODEV);
+}
+
+/*
+ * - receives the "bus frequency" from the caller (that's the IPS clock
+ *   rate, the historical source of clock information)
+ * - fetches the system PLL multiplier and divider values as well as the
+ *   IPS divider value from hardware
+ * - determines the REF clock rate either from the XTAL/OSC spec (if
+ *   there is a device tree node describing the oscillator) or from the
+ *   IPS bus clock (supported for backwards compatibility, such that
+ *   setups without XTAL/OSC specs keep working)
+ * - creates the "ref" clock item in the clock tree, such that
+ *   subsequent code can create the remainder of the hierarchy (REF ->
+ *   SYS -> CSB -> IPS) from the REF clock rate and the returned mul/div
+ *   values
+ */
+static void mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq,
+					int *sys_mul, int *sys_div,
+					int *ips_div)
+{
+	struct clk *osc_clk;
+	int calc_freq;
+
+	/* fetch mul/div factors from the hardware */
+	*sys_mul = get_spmf_mult();
+	*sys_mul *= 2;		/* compensate for the fractional divider */
+	*sys_div = get_sys_div_x2();
+	*ips_div = get_bit_field(&clkregs->scfr1, 23, 3);
+
+	/* lookup the oscillator clock for its rate */
+	osc_clk = of_clk_get_by_name(np, "osc");
+
+	/*
+	 * either descend from OSC to REF (and in bypassing verify the
+	 * IPS rate), or backtrack from IPS and multiplier values that
+	 * were fetched from hardware to REF and thus to the OSC value
+	 *
+	 * in either case the REF clock gets created here and the
+	 * remainder of the clock tree can get spanned from there
+	 */
+	if (!IS_ERR(osc_clk)) {
+		clks[MPC512x_CLK_REF] = mpc512x_clk_factor("ref", "osc", 1, 1);
+		calc_freq = clk_get_rate(clks[MPC512x_CLK_REF]);
+		calc_freq *= *sys_mul;
+		calc_freq /= *sys_div;
+		calc_freq /= 2;
+		calc_freq /= *ips_div;
+		if (bus_freq && calc_freq != bus_freq)
+			pr_warn("calc rate %d != OF spec %d\n",
+				calc_freq, bus_freq);
+	} else {
+		calc_freq = bus_freq;	/* start with IPS */
+		calc_freq *= *ips_div;	/* IPS -> CSB */
+		calc_freq *= 2;		/* CSB -> SYS */
+		calc_freq *= *sys_div;	/* SYS -> PLL out */
+		calc_freq /= *sys_mul;	/* PLL out -> REF == OSC */
+		clks[MPC512x_CLK_REF] = mpc512x_clk_fixed("ref", calc_freq);
+	}
+}
+
+/* MCLK helpers {{{ */
+
+/*
+ * helper code for the MCLK subtree setup
+ *
+ * the overview in section 5.2.4 of the MPC5121e Reference Manual rev4
+ * suggests that all instances of the "PSC clock generation" are equal,
+ * and that one might re-use the PSC setup for MSCAN clock generation
+ * (section 5.2.5) as well, at least the logic if not the data for
+ * description
+ *
+ * the details (starting at page 5-20) show differences in the specific
+ * inputs of the first mux stage ("can clk in", "spdif tx"), and the
+ * factual non-availability of the second mux stage (it's present yet
+ * only one input is valid)
+ *
+ * the MSCAN clock related registers (starting at page 5-35) all
+ * reference "spdif clk" at the first mux stage and don't mention any
+ * "can clk" at all, which somehow is unexpected
+ *
+ * TODO re-check the document, and clarify whether the RM is correct in
+ * the overview or in the details, and whether the difference is a
+ * clipboard induced error or results from chip revisions
+ *
+ * it turns out that the RM rev4 as of 2012-06 talks about "can" for the
+ * PSCs while RM rev3 as of 2008-10 talks about "spdif", so I guess that
+ * first a doc update is required which better reflects reality in the
+ * SoC before the implementation should follow while no questions remain
+ */
+
+/*
+ * note that this declaration raises a checkpatch warning, but
+ * it's the very data type dictated by <linux/clk-provider.h>,
+ * "fixing" this warning will break compilation
+ */
+static const char *parent_names_mux0_spdif[] = {
+	"sys", "ref", "psc-mclk-in", "spdif-tx",
+};
+
+static const char *parent_names_mux0_canin[] = {
+	"sys", "ref", "psc-mclk-in", "can-clk-in",
+};
+
+enum mclk_type {
+	MCLK_TYPE_PSC,
+	MCLK_TYPE_MSCAN,
+	MCLK_TYPE_SPDIF,
+	MCLK_TYPE_OUTCLK,
+};
+
+struct mclk_setup_data {
+	enum mclk_type type;
+	bool has_mclk1;
+	const char *name_mux0;
+	const char *name_en0;
+	const char *name_div0;
+	const char *parent_names_mux1[2];
+	const char *name_mclk;
+};
+
+#define MCLK_SETUP_DATA_PSC(id) { \
+	MCLK_TYPE_PSC, 0, \
+	"psc" #id "-mux0", \
+	"psc" #id "-en0", \
+	"psc" #id "_mclk_div", \
+	{ "psc" #id "_mclk_div", "dummy", }, \
+	"psc" #id "_mclk", \
+}
+
+#define MCLK_SETUP_DATA_MSCAN(id) { \
+	MCLK_TYPE_MSCAN, 0, \
+	"mscan" #id "-mux0", \
+	"mscan" #id "-en0", \
+	"mscan" #id "_mclk_div", \
+	{ "mscan" #id "_mclk_div", "dummy", }, \
+	"mscan" #id "_mclk", \
+}
+
+#define MCLK_SETUP_DATA_SPDIF { \
+	MCLK_TYPE_SPDIF, 1, \
+	"spdif-mux0", \
+	"spdif-en0", \
+	"spdif_mclk_div", \
+	{ "spdif_mclk_div", "spdif-rx", }, \
+	"spdif_mclk", \
+}
+
+#define MCLK_SETUP_DATA_OUTCLK(id) { \
+	MCLK_TYPE_OUTCLK, 0, \
+	"out" #id "-mux0", \
+	"out" #id "-en0", \
+	"out" #id "_mclk_div", \
+	{ "out" #id "_mclk_div", "dummy", }, \
+	"out" #id "_clk", \
+}
+
+static struct mclk_setup_data mclk_psc_data[] = {
+	MCLK_SETUP_DATA_PSC(0),
+	MCLK_SETUP_DATA_PSC(1),
+	MCLK_SETUP_DATA_PSC(2),
+	MCLK_SETUP_DATA_PSC(3),
+	MCLK_SETUP_DATA_PSC(4),
+	MCLK_SETUP_DATA_PSC(5),
+	MCLK_SETUP_DATA_PSC(6),
+	MCLK_SETUP_DATA_PSC(7),
+	MCLK_SETUP_DATA_PSC(8),
+	MCLK_SETUP_DATA_PSC(9),
+	MCLK_SETUP_DATA_PSC(10),
+	MCLK_SETUP_DATA_PSC(11),
+};
+
+static struct mclk_setup_data mclk_mscan_data[] = {
+	MCLK_SETUP_DATA_MSCAN(0),
+	MCLK_SETUP_DATA_MSCAN(1),
+	MCLK_SETUP_DATA_MSCAN(2),
+	MCLK_SETUP_DATA_MSCAN(3),
+};
+
+static struct mclk_setup_data mclk_spdif_data[] = {
+	MCLK_SETUP_DATA_SPDIF,
+};
+
+static struct mclk_setup_data mclk_outclk_data[] = {
+	MCLK_SETUP_DATA_OUTCLK(0),
+	MCLK_SETUP_DATA_OUTCLK(1),
+	MCLK_SETUP_DATA_OUTCLK(2),
+	MCLK_SETUP_DATA_OUTCLK(3),
+};
+
+/* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */
+static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
+{
+	size_t clks_idx_pub, clks_idx_int;
+	u32 __iomem *mccr_reg;	/* MCLK control register (mux, en, div) */
+	int div;
+
+	/* derive a few parameters from the component type and index */
+	switch (entry->type) {
+	case MCLK_TYPE_PSC:
+		clks_idx_pub = MPC512x_CLK_PSC0_MCLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (idx) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->psc_ccr[idx];
+		break;
+	case MCLK_TYPE_MSCAN:
+		clks_idx_pub = MPC512x_CLK_MSCAN0_MCLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + idx) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->mscan_ccr[idx];
+		break;
+	case MCLK_TYPE_SPDIF:
+		clks_idx_pub = MPC512x_CLK_SPDIF_MCLK;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + NR_MSCANS) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->spccr;
+		break;
+	case MCLK_TYPE_OUTCLK:
+		clks_idx_pub = MPC512x_CLK_OUT0_CLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + NR_MSCANS + NR_SPDIFS + idx)
+			     * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->out_ccr[idx];
+		break;
+	default:
+		return;
+	}
+
+	/*
+	 * this was grabbed from the PPC_CLOCK implementation, which
+	 * enforced a specific MCLK divider while the clock was gated
+	 * during setup (that's a documented hardware requirement)
+	 *
+	 * the PPC_CLOCK implementation might even have violated the
+	 * "MCLK <= IPS" constraint, the fixed divider value of 1
+	 * results in a divider of 2 and thus MCLK = SYS/2 which equals
+	 * CSB which is greater than IPS; the serial port setup may have
+	 * adjusted the divider which the clock setup might have left in
+	 * an undesirable state
+	 *
+	 * initial setup is:
+	 * - MCLK 0 from SYS
+	 * - MCLK DIV such to not exceed the IPS clock
+	 * - MCLK 0 enabled
+	 * - MCLK 1 from MCLK DIV
+	 */
+	div = clk_get_rate(clks[MPC512x_CLK_SYS]);
+	div /= clk_get_rate(clks[MPC512x_CLK_IPS]);
+	out_be32(mccr_reg, (0 << 16));
+	out_be32(mccr_reg, (0 << 16) | ((div - 1) << 17));
+	out_be32(mccr_reg, (1 << 16) | ((div - 1) << 17));
+
+	/*
+	 * create the 'struct clk' items of the MCLK's clock subtree
+	 *
+	 * note that by design we always create all nodes and won't take
+	 * shortcuts here, because
+	 * - the "internal" MCLK_DIV and MCLK_OUT signal in turn are
+	 *   selectable inputs to the CFM while those who "actually use"
+	 *   the PSC/MSCAN/SPDIF (serial drivers et al) need the MCLK
+	 *   for their bitrate
+	 * - in the absence of "aliases" for clocks we need to create
+	 *   individial 'struct clk' items for whatever might get
+	 *   referenced or looked up, even if several of those items are
+	 *   identical from the logical POV (their rate value)
+	 * - for easier future maintenance and for better reflection of
+	 *   the SoC's documentation, it appears appropriate to generate
+	 *   clock items even for those muxers which actually are NOPs
+	 *   (those with two inputs of which one is reserved)
+	 */
+	clks[clks_idx_int + MCLK_IDX_MUX0] = mpc512x_clk_muxed(
+			entry->name_mux0,
+			soc_has_mclk_mux0_canin()
+				? &parent_names_mux0_canin[0]
+				: &parent_names_mux0_spdif[0],
+			ARRAY_SIZE(parent_names_mux0_spdif),
+			mccr_reg, 14, 2);
+	clks[clks_idx_int + MCLK_IDX_EN0] = mpc512x_clk_gated(
+			entry->name_en0, entry->name_mux0,
+			mccr_reg, 16);
+	clks[clks_idx_int + MCLK_IDX_DIV0] = mpc512x_clk_divider(
+			entry->name_div0,
+			entry->name_en0, CLK_SET_RATE_GATE,
+			mccr_reg, 17, 15, 0);
+	if (entry->has_mclk1) {
+		clks[clks_idx_pub] = mpc512x_clk_muxed(
+				entry->name_mclk,
+				&entry->parent_names_mux1[0],
+				ARRAY_SIZE(entry->parent_names_mux1),
+				mccr_reg, 7, 1);
+	} else {
+		clks[clks_idx_pub] = mpc512x_clk_factor(
+				entry->name_mclk,
+				entry->parent_names_mux1[0],
+				1, 1);
+	}
+}
+
+/* }}} MCLK helpers */
+
+static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
+{
+	int sys_mul, sys_div, ips_div;
+	int mul, div;
+	size_t mclk_idx;
+	int freq;
+
+	/*
+	 * developer's notes:
+	 * - consider whether to handle clocks which have both gates and
+	 *   dividers via intermediates or by means of composites
+	 * - fractional dividers appear to not map well to composites
+	 *   since they can be seen as a fixed multiplier and an
+	 *   adjustable divider, while composites can only combine at
+	 *   most one of a mux, div, and gate each into one 'struct clk'
+	 *   item
+	 * - PSC/MSCAN/SPDIF clock generation OTOH already is very
+	 *   specific and cannot get mapped to componsites (at least not
+	 *   a single one, maybe two of them, but then some of these
+	 *   intermediate clock signals get referenced elsewhere (e.g.
+	 *   in the clock frequency measurement, CFM) and thus need
+	 *   publicly available names
+	 * - the current source layout appropriately reflects the
+	 *   hardware setup, and it works, so it's questionable whether
+	 *   further changes will result in big enough a benefit
+	 */
+
+	/* regardless of whether XTAL/OSC exists, have REF created */
+	mpc512x_clk_setup_ref_clock(np, busfreq, &sys_mul, &sys_div, &ips_div);
+
+	/* now setup the REF -> SYS -> CSB -> IPS hierarchy */
+	clks[MPC512x_CLK_SYS] = mpc512x_clk_factor("sys", "ref",
+						   sys_mul, sys_div);
+	clks[MPC512x_CLK_CSB] = mpc512x_clk_factor("csb", "sys", 1, 2);
+	clks[MPC512x_CLK_IPS] = mpc512x_clk_divtable("ips", "csb",
+						     &clkregs->scfr1, 23, 3,
+						     divtab_2346);
+	/* now setup anything below SYS and CSB and IPS */
+
+	clks[MPC512x_CLK_DDR_UG] = mpc512x_clk_factor("ddr-ug", "sys", 1, 2);
+
+	/*
+	 * the Reference Manual discusses that for SDHC only even divide
+	 * ratios are supported because clock domain synchronization
+	 * between 'per' and 'ipg' is broken;
+	 * keep the divider's bit 0 cleared (per reset value), and only
+	 * allow to setup the divider's bits 7:1, which results in that
+	 * only even divide ratios can get configured upon rate changes;
+	 * keep the "x4" name because this bit shift hack is an internal
+	 * implementation detail, the "fractional divider with quarters"
+	 * semantics remains
+	 */
+	clks[MPC512x_CLK_SDHC_x4] = mpc512x_clk_factor("sdhc-x4", "csb", 2, 1);
+	clks[MPC512x_CLK_SDHC_UG] = mpc512x_clk_divider("sdhc-ug", "sdhc-x4", 0,
+							&clkregs->scfr2, 1, 7,
+							CLK_DIVIDER_ONE_BASED);
+	if (soc_has_sdhc2()) {
+		clks[MPC512x_CLK_SDHC2_UG] = mpc512x_clk_divider(
+				"sdhc2-ug", "sdhc-x4", 0, &clkregs->scfr2,
+				9, 7, CLK_DIVIDER_ONE_BASED);
+	}
+
+	clks[MPC512x_CLK_DIU_x4] = mpc512x_clk_factor("diu-x4", "csb", 4, 1);
+	clks[MPC512x_CLK_DIU_UG] = mpc512x_clk_divider("diu-ug", "diu-x4", 0,
+						       &clkregs->scfr1, 0, 8,
+						       CLK_DIVIDER_ONE_BASED);
+
+	/*
+	 * the "power architecture PLL" was setup from data which was
+	 * sampled from the reset config word, at this point in time the
+	 * configuration can be considered fixed and read only (i.e. no
+	 * longer adjustable, or no longer in need of adjustment), which
+	 * is why we don't register a PLL here but assume fixed factors
+	 */
+	mul = get_cpmf_mult_x2();
+	div = 2;	/* compensate for the fractional factor */
+	clks[MPC512x_CLK_E300] = mpc512x_clk_factor("e300", "csb", mul, div);
+
+	if (soc_has_mbx()) {
+		clks[MPC512x_CLK_MBX_BUS_UG] = mpc512x_clk_factor(
+				"mbx-bus-ug", "csb", 1, 2);
+		clks[MPC512x_CLK_MBX_UG] = mpc512x_clk_divtable(
+				"mbx-ug", "mbx-bus-ug", &clkregs->scfr1,
+				14, 3, divtab_1234);
+		clks[MPC512x_CLK_MBX_3D_UG] = mpc512x_clk_factor(
+				"mbx-3d-ug", "mbx-ug", 1, 1);
+	}
+	if (soc_has_pci()) {
+		clks[MPC512x_CLK_PCI_UG] = mpc512x_clk_divtable(
+				"pci-ug", "csb", &clkregs->scfr1,
+				20, 3, divtab_2346);
+	}
+	if (soc_has_nfc_5125()) {
+		/*
+		 * XXX TODO implement 5125 NFC clock setup logic,
+		 * with high/low period counters in clkregs->scfr3,
+		 * currently there are no users so it's ENOIMPL
+		 */
+		clks[MPC512x_CLK_NFC_UG] = ERR_PTR(-ENOTSUPP);
+	} else {
+		clks[MPC512x_CLK_NFC_UG] = mpc512x_clk_divtable(
+				"nfc-ug", "ips", &clkregs->scfr1,
+				8, 3, divtab_1234);
+	}
+	clks[MPC512x_CLK_LPC_UG] = mpc512x_clk_divtable("lpc-ug", "ips",
+							&clkregs->scfr1, 11, 3,
+							divtab_1234);
+
+	clks[MPC512x_CLK_LPC] = mpc512x_clk_gated("lpc", "lpc-ug",
+						  &clkregs->sccr1, 30);
+	clks[MPC512x_CLK_NFC] = mpc512x_clk_gated("nfc", "nfc-ug",
+						  &clkregs->sccr1, 29);
+	if (soc_has_pata()) {
+		clks[MPC512x_CLK_PATA] = mpc512x_clk_gated(
+				"pata", "ips", &clkregs->sccr1, 28);
+	}
+	/* for PSCs there is a "registers" gate and a bitrate MCLK subtree */
+	for (mclk_idx = 0; mclk_idx < soc_max_pscnum(); mclk_idx++) {
+		char name[12];
+		snprintf(name, sizeof(name), "psc%d", mclk_idx);
+		clks[MPC512x_CLK_PSC0 + mclk_idx] = mpc512x_clk_gated(
+				name, "ips", &clkregs->sccr1, 27 - mclk_idx);
+		mpc512x_clk_setup_mclk(&mclk_psc_data[mclk_idx], mclk_idx);
+	}
+	clks[MPC512x_CLK_PSC_FIFO] = mpc512x_clk_gated("psc-fifo", "ips",
+						       &clkregs->sccr1, 15);
+	if (soc_has_sata()) {
+		clks[MPC512x_CLK_SATA] = mpc512x_clk_gated(
+				"sata", "ips", &clkregs->sccr1, 14);
+	}
+	clks[MPC512x_CLK_FEC] = mpc512x_clk_gated("fec", "ips",
+						  &clkregs->sccr1, 13);
+	if (soc_has_pci()) {
+		clks[MPC512x_CLK_PCI] = mpc512x_clk_gated(
+				"pci", "pci-ug", &clkregs->sccr1, 11);
+	}
+	clks[MPC512x_CLK_DDR] = mpc512x_clk_gated("ddr", "ddr-ug",
+						  &clkregs->sccr1, 10);
+	if (soc_has_fec2()) {
+		clks[MPC512x_CLK_FEC2] = mpc512x_clk_gated(
+				"fec2", "ips", &clkregs->sccr1, 9);
+	}
+
+	clks[MPC512x_CLK_DIU] = mpc512x_clk_gated("diu", "diu-ug",
+						  &clkregs->sccr2, 31);
+	if (soc_has_axe()) {
+		clks[MPC512x_CLK_AXE] = mpc512x_clk_gated(
+				"axe", "csb", &clkregs->sccr2, 30);
+	}
+	clks[MPC512x_CLK_MEM] = mpc512x_clk_gated("mem", "ips",
+						  &clkregs->sccr2, 29);
+	clks[MPC512x_CLK_USB1] = mpc512x_clk_gated("usb1", "csb",
+						   &clkregs->sccr2, 28);
+	clks[MPC512x_CLK_USB2] = mpc512x_clk_gated("usb2", "csb",
+						   &clkregs->sccr2, 27);
+	clks[MPC512x_CLK_I2C] = mpc512x_clk_gated("i2c", "ips",
+						  &clkregs->sccr2, 26);
+	/* MSCAN differs from PSC with just one gate for multiple components */
+	clks[MPC512x_CLK_BDLC] = mpc512x_clk_gated("bdlc", "ips",
+						   &clkregs->sccr2, 25);
+	for (mclk_idx = 0; mclk_idx < ARRAY_SIZE(mclk_mscan_data); mclk_idx++)
+		mpc512x_clk_setup_mclk(&mclk_mscan_data[mclk_idx], mclk_idx);
+	clks[MPC512x_CLK_SDHC] = mpc512x_clk_gated("sdhc", "sdhc-ug",
+						   &clkregs->sccr2, 24);
+	/* there is only one SPDIF component, which shares MCLK support code */
+	if (soc_has_spdif()) {
+		clks[MPC512x_CLK_SPDIF] = mpc512x_clk_gated(
+				"spdif", "ips", &clkregs->sccr2, 23);
+		mpc512x_clk_setup_mclk(&mclk_spdif_data[0], 0);
+	}
+	if (soc_has_mbx()) {
+		clks[MPC512x_CLK_MBX_BUS] = mpc512x_clk_gated(
+				"mbx-bus", "mbx-bus-ug", &clkregs->sccr2, 22);
+		clks[MPC512x_CLK_MBX] = mpc512x_clk_gated(
+				"mbx", "mbx-ug", &clkregs->sccr2, 21);
+		clks[MPC512x_CLK_MBX_3D] = mpc512x_clk_gated(
+				"mbx-3d", "mbx-3d-ug", &clkregs->sccr2, 20);
+	}
+	clks[MPC512x_CLK_IIM] = mpc512x_clk_gated("iim", "csb",
+						  &clkregs->sccr2, 19);
+	if (soc_has_viu()) {
+		clks[MPC512x_CLK_VIU] = mpc512x_clk_gated(
+				"viu", "csb", &clkregs->sccr2, 18);
+	}
+	if (soc_has_sdhc2()) {
+		clks[MPC512x_CLK_SDHC2] = mpc512x_clk_gated(
+				"sdhc-2", "sdhc2-ug", &clkregs->sccr2, 17);
+	}
+
+	if (soc_has_outclk()) {
+		size_t idx;	/* used as mclk_idx, just to trim line length */
+		for (idx = 0; idx < ARRAY_SIZE(mclk_outclk_data); idx++)
+			mpc512x_clk_setup_mclk(&mclk_outclk_data[idx], idx);
+	}
+
+	/*
+	 * externally provided clocks (when implemented in hardware,
+	 * device tree may specify values which otherwise were unknown)
+	 */
+	freq = get_freq_from_dt("psc_mclk_in");
+	if (!freq)
+		freq = 25000000;
+	clks[MPC512x_CLK_PSC_MCLK_IN] = mpc512x_clk_fixed("psc_mclk_in", freq);
+	if (soc_has_mclk_mux0_canin()) {
+		freq = get_freq_from_dt("can_clk_in");
+		clks[MPC512x_CLK_CAN_CLK_IN] = mpc512x_clk_fixed(
+				"can_clk_in", freq);
+	} else {
+		freq = get_freq_from_dt("spdif_tx_in");
+		clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed(
+				"spdif_tx_in", freq);
+		freq = get_freq_from_dt("spdif_rx_in");
+		clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed(
+				"spdif_rx_in", freq);
+	}
+
+	/* fixed frequency for AC97, always 24.567MHz */
+	clks[MPC512x_CLK_AC97] = mpc512x_clk_fixed("ac97", 24567000);
+
+	/*
+	 * pre-enable those "internal" clock items which never get
+	 * claimed by any peripheral driver, to not have the clock
+	 * subsystem disable them late at startup
+	 */
+	clk_prepare_enable(clks[MPC512x_CLK_DUMMY]);
+	clk_prepare_enable(clks[MPC512x_CLK_E300]);	/* PowerPC CPU */
+	clk_prepare_enable(clks[MPC512x_CLK_DDR]);	/* DRAM */
+	clk_prepare_enable(clks[MPC512x_CLK_MEM]);	/* SRAM */
+	clk_prepare_enable(clks[MPC512x_CLK_IPS]);	/* SoC periph */
+	clk_prepare_enable(clks[MPC512x_CLK_LPC]);	/* boot media */
+}
+
+/*
+ * registers the set of public clocks (those listed in the dt-bindings/
+ * header file) for OF lookups, keeps the intermediates private to us
+ */
+static void mpc5121_clk_register_of_provider(struct device_node *np)
+{
+	clk_data.clks = clks;
+	clk_data.clk_num = MPC512x_CLK_LAST_PUBLIC + 1;	/* _not_ ARRAY_SIZE() */
+	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+}
+
+/*
+ * temporary support for the period of time between introduction of CCF
+ * support and the adjustment of peripheral drivers to OF based lookups
+ */
+static void mpc5121_clk_provide_migration_support(void)
+{
+
+	/*
+	 * pre-enable those clock items which are not yet appropriately
+	 * acquired by their peripheral driver
+	 *
+	 * the PCI clock cannot get acquired by its peripheral driver,
+	 * because for this platform the driver won't probe(), instead
+	 * initialization is done from within the .setup_arch() routine
+	 * at a point in time where the clock provider has not been
+	 * setup yet and thus isn't available yet
+	 *
+	 * so we "pre-enable" the clock here, to not have the clock
+	 * subsystem automatically disable this item in a late init call
+	 *
+	 * this PCI clock pre-enable workaround only applies when there
+	 * are device tree nodes for PCI and thus the peripheral driver
+	 * has attached to bridges, otherwise the PCI clock remains
+	 * unused and so it gets disabled
+	 */
+	clk_prepare_enable(clks[MPC512x_CLK_PSC3_MCLK]);/* serial console */
+	if (of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci"))
+		clk_prepare_enable(clks[MPC512x_CLK_PCI]);
+}
+
+/*
+ * those macros are not exactly pretty, but they encapsulate a lot
+ * of copy'n'paste heavy code which is even more ugly, and reduce
+ * the potential for inconsistencies in those many code copies
+ */
+#define FOR_NODES(compatname) \
+	for_each_compatible_node(np, NULL, compatname)
+
+#define NODE_PREP do { \
+	of_address_to_resource(np, 0, &res); \
+	snprintf(devname, sizeof(devname), "%08x.%s", res.start, np->name); \
+} while (0)
+
+#define NODE_CHK(clkname, clkitem, regnode, regflag) do { \
+	struct clk *clk; \
+	clk = of_clk_get_by_name(np, clkname); \
+	if (IS_ERR(clk)) { \
+		clk = clkitem; \
+		clk_register_clkdev(clk, clkname, devname); \
+		if (regnode) \
+			clk_register_clkdev(clk, clkname, np->name); \
+		did_register |= DID_REG_ ## regflag; \
+		pr_debug("clock alias name '%s' for dev '%s' pointer %p\n", \
+			 clkname, devname, clk); \
+	} else { \
+		clk_put(clk); \
+	} \
+} while (0)
+
+/*
+ * register source code provided fallback results for clock lookups,
+ * these get consulted when OF based clock lookup fails (that is in the
+ * case of not yet adjusted device tree data, where clock related specs
+ * are missing)
+ */
+static void mpc5121_clk_provide_backwards_compat(void)
+{
+	enum did_reg_flags {
+		DID_REG_PSC	= BIT(0),
+		DID_REG_PSCFIFO	= BIT(1),
+		DID_REG_NFC	= BIT(2),
+		DID_REG_CAN	= BIT(3),
+		DID_REG_I2C	= BIT(4),
+		DID_REG_DIU	= BIT(5),
+		DID_REG_VIU	= BIT(6),
+		DID_REG_FEC	= BIT(7),
+		DID_REG_USB	= BIT(8),
+		DID_REG_PATA	= BIT(9),
+	};
+
+	int did_register;
+	struct device_node *np;
+	struct resource res;
+	int idx;
+	char devname[32];
+
+	did_register = 0;
+
+	FOR_NODES(mpc512x_select_psc_compat()) {
+		NODE_PREP;
+		idx = (res.start >> 8) & 0xf;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PSC0 + idx], 0, PSC);
+		NODE_CHK("mclk", clks[MPC512x_CLK_PSC0_MCLK + idx], 0, PSC);
+	}
+
+	FOR_NODES("fsl,mpc5121-psc-fifo") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PSC_FIFO], 1, PSCFIFO);
+	}
+
+	FOR_NODES("fsl,mpc5121-nfc") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_NFC], 0, NFC);
+	}
+
+	FOR_NODES("fsl,mpc5121-mscan") {
+		NODE_PREP;
+		idx = 0;
+		idx += (res.start & 0x2000) ? 2 : 0;
+		idx += (res.start & 0x0080) ? 1 : 0;
+		NODE_CHK("ipg", clks[MPC512x_CLK_BDLC], 0, CAN);
+		NODE_CHK("mclk", clks[MPC512x_CLK_MSCAN0_MCLK + idx], 0, CAN);
+	}
+
+	/*
+	 * do register the 'ips', 'sys', and 'ref' names globally
+	 * instead of inside each individual CAN node, as there is no
+	 * potential for a name conflict (in contrast to 'ipg' and 'mclk')
+	 */
+	if (did_register & DID_REG_CAN) {
+		clk_register_clkdev(clks[MPC512x_CLK_IPS], "ips", NULL);
+		clk_register_clkdev(clks[MPC512x_CLK_SYS], "sys", NULL);
+		clk_register_clkdev(clks[MPC512x_CLK_REF], "ref", NULL);
+	}
+
+	FOR_NODES("fsl,mpc5121-i2c") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_I2C], 0, I2C);
+	}
+
+	/*
+	 * workaround for the fact that the I2C driver does an "anonymous"
+	 * lookup (NULL name spec, which yields the first clock spec) for
+	 * which we cannot register an alias -- a _global_ 'ipg' alias that
+	 * is not bound to any device name and returns the I2C clock item
+	 * is not a good idea
+	 *
+	 * so we have the lookup in the peripheral driver fail, which is
+	 * silent and non-fatal, and pre-enable the clock item here such
+	 * that register access is possible
+	 *
+	 * see commit b3bfce2b "i2c: mpc: cleanup clock API use" for
+	 * details, adjusting s/NULL/"ipg"/ in i2c-mpc.c would make this
+	 * workaround obsolete
+	 */
+	if (did_register & DID_REG_I2C)
+		clk_prepare_enable(clks[MPC512x_CLK_I2C]);
+
+	FOR_NODES("fsl,mpc5121-diu") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_DIU], 1, DIU);
+	}
+
+	FOR_NODES("fsl,mpc5121-viu") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_VIU], 0, VIU);
+	}
+
+	/*
+	 * note that 2771399a "fs_enet: cleanup clock API use" did use the
+	 * "per" string for the clock lookup in contrast to the "ipg" name
+	 * which most other nodes are using -- this is not a fatal thing
+	 * but just something to keep in mind when doing compatibility
+	 * registration, it's a non-issue with up-to-date device tree data
+	 */
+	FOR_NODES("fsl,mpc5121-fec") {
+		NODE_PREP;
+		NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC);
+	}
+	FOR_NODES("fsl,mpc5121-fec-mdio") {
+		NODE_PREP;
+		NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC);
+	}
+	/*
+	 * MPC5125 has two FECs: FEC1 at 0x2800, FEC2 at 0x4800;
+	 * the clock items don't "form an array" since FEC2 was
+	 * added only later and was not allowed to shift all other
+	 * clock item indices, so the numbers aren't adjacent
+	 */
+	FOR_NODES("fsl,mpc5125-fec") {
+		NODE_PREP;
+		if (res.start & 0x4000)
+			idx = MPC512x_CLK_FEC2;
+		else
+			idx = MPC512x_CLK_FEC;
+		NODE_CHK("per", clks[idx], 0, FEC);
+	}
+
+	FOR_NODES("fsl,mpc5121-usb2-dr") {
+		NODE_PREP;
+		idx = (res.start & 0x4000) ? 1 : 0;
+		NODE_CHK("ipg", clks[MPC512x_CLK_USB1 + idx], 0, USB);
+	}
+
+	FOR_NODES("fsl,mpc5121-pata") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PATA], 0, PATA);
+	}
+
+	/*
+	 * try to collapse diagnostics into a single line of output yet
+	 * provide a full list of what is missing, to avoid noise in the
+	 * absence of up-to-date device tree data -- backwards
+	 * compatibility to old DTBs is a requirement, updates may be
+	 * desirable or preferrable but are not at all mandatory
+	 */
+	if (did_register) {
+		pr_notice("device tree lacks clock specs, adding fallbacks (0x%x,%s%s%s%s%s%s%s%s%s%s)\n",
+			  did_register,
+			  (did_register & DID_REG_PSC) ? " PSC" : "",
+			  (did_register & DID_REG_PSCFIFO) ? " PSCFIFO" : "",
+			  (did_register & DID_REG_NFC) ? " NFC" : "",
+			  (did_register & DID_REG_CAN) ? " CAN" : "",
+			  (did_register & DID_REG_I2C) ? " I2C" : "",
+			  (did_register & DID_REG_DIU) ? " DIU" : "",
+			  (did_register & DID_REG_VIU) ? " VIU" : "",
+			  (did_register & DID_REG_FEC) ? " FEC" : "",
+			  (did_register & DID_REG_USB) ? " USB" : "",
+			  (did_register & DID_REG_PATA) ? " PATA" : "");
+	} else {
+		pr_debug("device tree has clock specs, no fallbacks added\n");
+	}
+}
+
+int __init mpc5121_clk_init(void)
+{
+	struct device_node *clk_np;
+	int busfreq;
+
+	/* map the clock control registers */
+	clk_np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock");
+	if (!clk_np)
+		return -ENODEV;
+	clkregs = of_iomap(clk_np, 0);
+	WARN_ON(!clkregs);
+
+	/* determine the SoC variant we run on */
+	mpc512x_clk_determine_soc();
+
+	/* invalidate all not yet registered clock slots */
+	mpc512x_clk_preset_data();
+
+	/*
+	 * have the device tree scanned for "fixed-clock" nodes (which
+	 * includes the oscillator node if the board's DT provides one)
+	 */
+	of_clk_init(NULL);
+
+	/*
+	 * add a dummy clock for those situations where a clock spec is
+	 * required yet no real clock is involved
+	 */
+	clks[MPC512x_CLK_DUMMY] = mpc512x_clk_fixed("dummy", 0);
+
+	/*
+	 * have all the real nodes in the clock tree populated from REF
+	 * down to all leaves, either starting from the OSC node or from
+	 * a REF root that was created from the IPS bus clock input
+	 */
+	busfreq = get_freq_from_dt("bus-frequency");
+	mpc512x_clk_setup_clock_tree(clk_np, busfreq);
+
+	/* register as an OF clock provider */
+	mpc5121_clk_register_of_provider(clk_np);
+
+	/*
+	 * unbreak not yet adjusted peripheral drivers during migration
+	 * towards fully operational common clock support, and allow
+	 * operation in the absence of clock related device tree specs
+	 */
+	mpc5121_clk_provide_migration_support();
+	mpc5121_clk_provide_backwards_compat();
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/512x/clock.c b/arch/powerpc/platforms/512x/clock.c
deleted file mode 100644
index e504166e089..00000000000
--- a/arch/powerpc/platforms/512x/clock.c
+++ /dev/null
@@ -1,753 +0,0 @@
-/*
- * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: John Rigby <jrigby@freescale.com>
- *
- * Implements the clk api defined in include/linux/clk.h
- *
- *    Original based on linux/arch/arm/mach-integrator/clock.c
- *
- *    Copyright (C) 2004 ARM Limited.
- *    Written by Deep Blue Solutions Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/clk.h>
-#include <linux/mutex.h>
-#include <linux/io.h>
-
-#include <linux/of_platform.h>
-#include <asm/mpc5xxx.h>
-#include <asm/mpc5121.h>
-#include <asm/clk_interface.h>
-
-#include "mpc512x.h"
-
-#undef CLK_DEBUG
-
-static int clocks_initialized;
-
-#define CLK_HAS_RATE	0x1	/* has rate in MHz */
-#define CLK_HAS_CTRL	0x2	/* has control reg and bit */
-
-struct clk {
-	struct list_head node;
-	char name[32];
-	int flags;
-	struct device *dev;
-	unsigned long rate;
-	struct module *owner;
-	void (*calc) (struct clk *);
-	struct clk *parent;
-	int reg, bit;		/* CLK_HAS_CTRL */
-	int div_shift;		/* only used by generic_div_clk_calc */
-};
-
-static LIST_HEAD(clocks);
-static DEFINE_MUTEX(clocks_mutex);
-
-static struct clk *mpc5121_clk_get(struct device *dev, const char *id)
-{
-	struct clk *p, *clk = ERR_PTR(-ENOENT);
-	int dev_match;
-	int id_match;
-
-	if (dev == NULL || id == NULL)
-		return clk;
-
-	mutex_lock(&clocks_mutex);
-	list_for_each_entry(p, &clocks, node) {
-		dev_match = id_match = 0;
-
-		if (dev == p->dev)
-			dev_match++;
-		if (strcmp(id, p->name) == 0)
-			id_match++;
-		if ((dev_match || id_match) && try_module_get(p->owner)) {
-			clk = p;
-			break;
-		}
-	}
-	mutex_unlock(&clocks_mutex);
-
-	return clk;
-}
-
-#ifdef CLK_DEBUG
-static void dump_clocks(void)
-{
-	struct clk *p;
-
-	mutex_lock(&clocks_mutex);
-	printk(KERN_INFO "CLOCKS:\n");
-	list_for_each_entry(p, &clocks, node) {
-		pr_info("  %s=%ld", p->name, p->rate);
-		if (p->parent)
-			pr_cont(" %s=%ld", p->parent->name,
-			       p->parent->rate);
-		if (p->flags & CLK_HAS_CTRL)
-			pr_cont(" reg/bit=%d/%d", p->reg, p->bit);
-		pr_cont("\n");
-	}
-	mutex_unlock(&clocks_mutex);
-}
-#define	DEBUG_CLK_DUMP() dump_clocks()
-#else
-#define	DEBUG_CLK_DUMP()
-#endif
-
-
-static void mpc5121_clk_put(struct clk *clk)
-{
-	module_put(clk->owner);
-}
-
-#define NRPSC 12
-
-struct mpc512x_clockctl {
-	u32 spmr;		/* System PLL Mode Reg */
-	u32 sccr[2];		/* System Clk Ctrl Reg 1 & 2 */
-	u32 scfr1;		/* System Clk Freq Reg 1 */
-	u32 scfr2;		/* System Clk Freq Reg 2 */
-	u32 reserved;
-	u32 bcr;		/* Bread Crumb Reg */
-	u32 pccr[NRPSC];	/* PSC Clk Ctrl Reg 0-11 */
-	u32 spccr;		/* SPDIF Clk Ctrl Reg */
-	u32 cccr;		/* CFM Clk Ctrl Reg */
-	u32 dccr;		/* DIU Clk Cnfg Reg */
-};
-
-static struct mpc512x_clockctl __iomem *clockctl;
-
-static int mpc5121_clk_enable(struct clk *clk)
-{
-	unsigned int mask;
-
-	if (clk->flags & CLK_HAS_CTRL) {
-		mask = in_be32(&clockctl->sccr[clk->reg]);
-		mask |= 1 << clk->bit;
-		out_be32(&clockctl->sccr[clk->reg], mask);
-	}
-	return 0;
-}
-
-static void mpc5121_clk_disable(struct clk *clk)
-{
-	unsigned int mask;
-
-	if (clk->flags & CLK_HAS_CTRL) {
-		mask = in_be32(&clockctl->sccr[clk->reg]);
-		mask &= ~(1 << clk->bit);
-		out_be32(&clockctl->sccr[clk->reg], mask);
-	}
-}
-
-static unsigned long mpc5121_clk_get_rate(struct clk *clk)
-{
-	if (clk->flags & CLK_HAS_RATE)
-		return clk->rate;
-	else
-		return 0;
-}
-
-static long mpc5121_clk_round_rate(struct clk *clk, unsigned long rate)
-{
-	return rate;
-}
-
-static int mpc5121_clk_set_rate(struct clk *clk, unsigned long rate)
-{
-	return 0;
-}
-
-static int clk_register(struct clk *clk)
-{
-	mutex_lock(&clocks_mutex);
-	list_add(&clk->node, &clocks);
-	mutex_unlock(&clocks_mutex);
-	return 0;
-}
-
-static unsigned long spmf_mult(void)
-{
-	/*
-	 * Convert spmf to multiplier
-	 */
-	static int spmf_to_mult[] = {
-		68, 1, 12, 16,
-		20, 24, 28, 32,
-		36, 40, 44, 48,
-		52, 56, 60, 64
-	};
-	int spmf = (in_be32(&clockctl->spmr) >> 24) & 0xf;
-	return spmf_to_mult[spmf];
-}
-
-static unsigned long sysdiv_div_x_2(void)
-{
-	/*
-	 * Convert sysdiv to divisor x 2
-	 * Some divisors have fractional parts so
-	 * multiply by 2 then divide by this value
-	 */
-	static int sysdiv_to_div_x_2[] = {
-		4, 5, 6, 7,
-		8, 9, 10, 14,
-		12, 16, 18, 22,
-		20, 24, 26, 30,
-		28, 32, 34, 38,
-		36, 40, 42, 46,
-		44, 48, 50, 54,
-		52, 56, 58, 62,
-		60, 64, 66,
-	};
-	int sysdiv = (in_be32(&clockctl->scfr2) >> 26) & 0x3f;
-	return sysdiv_to_div_x_2[sysdiv];
-}
-
-static unsigned long ref_to_sys(unsigned long rate)
-{
-	rate *= spmf_mult();
-	rate *= 2;
-	rate /= sysdiv_div_x_2();
-
-	return rate;
-}
-
-static unsigned long sys_to_ref(unsigned long rate)
-{
-	rate *= sysdiv_div_x_2();
-	rate /= 2;
-	rate /= spmf_mult();
-
-	return rate;
-}
-
-static long ips_to_ref(unsigned long rate)
-{
-	int ips_div = (in_be32(&clockctl->scfr1) >> 23) & 0x7;
-
-	rate *= ips_div;	/* csb_clk = ips_clk * ips_div */
-	rate *= 2;		/* sys_clk = csb_clk * 2 */
-	return sys_to_ref(rate);
-}
-
-static unsigned long devtree_getfreq(char *clockname)
-{
-	struct device_node *np;
-	const unsigned int *prop;
-	unsigned int val = 0;
-
-	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-immr");
-	if (np) {
-		prop = of_get_property(np, clockname, NULL);
-		if (prop)
-			val = *prop;
-	    of_node_put(np);
-	}
-	return val;
-}
-
-static void ref_clk_calc(struct clk *clk)
-{
-	unsigned long rate;
-
-	rate = devtree_getfreq("bus-frequency");
-	if (rate == 0) {
-		printk(KERN_ERR "No bus-frequency in dev tree\n");
-		clk->rate = 0;
-		return;
-	}
-	clk->rate = ips_to_ref(rate);
-}
-
-static struct clk ref_clk = {
-	.name = "ref_clk",
-	.calc = ref_clk_calc,
-};
-
-
-static void sys_clk_calc(struct clk *clk)
-{
-	clk->rate = ref_to_sys(ref_clk.rate);
-}
-
-static struct clk sys_clk = {
-	.name = "sys_clk",
-	.calc = sys_clk_calc,
-};
-
-static void diu_clk_calc(struct clk *clk)
-{
-	int diudiv_x_2 = in_be32(&clockctl->scfr1) & 0xff;
-	unsigned long rate;
-
-	rate = sys_clk.rate;
-
-	rate *= 2;
-	rate /= diudiv_x_2;
-
-	clk->rate = rate;
-}
-
-static void viu_clk_calc(struct clk *clk)
-{
-	unsigned long rate;
-
-	rate = sys_clk.rate;
-	rate /= 2;
-	clk->rate = rate;
-}
-
-static void half_clk_calc(struct clk *clk)
-{
-	clk->rate = clk->parent->rate / 2;
-}
-
-static void generic_div_clk_calc(struct clk *clk)
-{
-	int div = (in_be32(&clockctl->scfr1) >> clk->div_shift) & 0x7;
-
-	clk->rate = clk->parent->rate / div;
-}
-
-static void unity_clk_calc(struct clk *clk)
-{
-	clk->rate = clk->parent->rate;
-}
-
-static struct clk csb_clk = {
-	.name = "csb_clk",
-	.calc = half_clk_calc,
-	.parent = &sys_clk,
-};
-
-static void e300_clk_calc(struct clk *clk)
-{
-	int spmf = (in_be32(&clockctl->spmr) >> 16) & 0xf;
-	int ratex2 = clk->parent->rate * spmf;
-
-	clk->rate = ratex2 / 2;
-}
-
-static struct clk e300_clk = {
-	.name = "e300_clk",
-	.calc = e300_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk ips_clk = {
-	.name = "ips_clk",
-	.calc = generic_div_clk_calc,
-	.parent = &csb_clk,
-	.div_shift = 23,
-};
-
-/*
- * Clocks controlled by SCCR1 (.reg = 0)
- */
-static struct clk lpc_clk = {
-	.name = "lpc_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 30,
-	.calc = generic_div_clk_calc,
-	.parent = &ips_clk,
-	.div_shift = 11,
-};
-
-static struct clk nfc_clk = {
-	.name = "nfc_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 29,
-	.calc = generic_div_clk_calc,
-	.parent = &ips_clk,
-	.div_shift = 8,
-};
-
-static struct clk pata_clk = {
-	.name = "pata_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 28,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-/*
- * PSC clocks (bits 27 - 16)
- * are setup elsewhere
- */
-
-static struct clk sata_clk = {
-	.name = "sata_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 14,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-static struct clk fec_clk = {
-	.name = "fec_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 13,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-static struct clk pci_clk = {
-	.name = "pci_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 11,
-	.calc = generic_div_clk_calc,
-	.parent = &csb_clk,
-	.div_shift = 20,
-};
-
-/*
- * Clocks controlled by SCCR2 (.reg = 1)
- */
-static struct clk diu_clk = {
-	.name = "diu_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 31,
-	.calc = diu_clk_calc,
-};
-
-static struct clk viu_clk = {
-	.name = "viu_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 18,
-	.calc = viu_clk_calc,
-};
-
-static struct clk axe_clk = {
-	.name = "axe_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 30,
-	.calc = unity_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk usb1_clk = {
-	.name = "usb1_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 28,
-	.calc = unity_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk usb2_clk = {
-	.name = "usb2_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 27,
-	.calc = unity_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk i2c_clk = {
-	.name = "i2c_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 26,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-static struct clk mscan_clk = {
-	.name = "mscan_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 25,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-static struct clk sdhc_clk = {
-	.name = "sdhc_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 24,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-static struct clk mbx_bus_clk = {
-	.name = "mbx_bus_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 22,
-	.calc = half_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk mbx_clk = {
-	.name = "mbx_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 21,
-	.calc = unity_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk mbx_3d_clk = {
-	.name = "mbx_3d_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 20,
-	.calc = generic_div_clk_calc,
-	.parent = &mbx_bus_clk,
-	.div_shift = 14,
-};
-
-static void psc_mclk_in_calc(struct clk *clk)
-{
-	clk->rate = devtree_getfreq("psc_mclk_in");
-	if (!clk->rate)
-		clk->rate = 25000000;
-}
-
-static struct clk psc_mclk_in = {
-	.name = "psc_mclk_in",
-	.calc = psc_mclk_in_calc,
-};
-
-static struct clk spdif_txclk = {
-	.name = "spdif_txclk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 23,
-};
-
-static struct clk spdif_rxclk = {
-	.name = "spdif_rxclk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 23,
-};
-
-static void ac97_clk_calc(struct clk *clk)
-{
-	/* ac97 bit clock is always 24.567 MHz */
-	clk->rate = 24567000;
-}
-
-static struct clk ac97_clk = {
-	.name = "ac97_clk_in",
-	.calc = ac97_clk_calc,
-};
-
-static struct clk *rate_clks[] = {
-	&ref_clk,
-	&sys_clk,
-	&diu_clk,
-	&viu_clk,
-	&csb_clk,
-	&e300_clk,
-	&ips_clk,
-	&fec_clk,
-	&sata_clk,
-	&pata_clk,
-	&nfc_clk,
-	&lpc_clk,
-	&mbx_bus_clk,
-	&mbx_clk,
-	&mbx_3d_clk,
-	&axe_clk,
-	&usb1_clk,
-	&usb2_clk,
-	&i2c_clk,
-	&mscan_clk,
-	&sdhc_clk,
-	&pci_clk,
-	&psc_mclk_in,
-	&spdif_txclk,
-	&spdif_rxclk,
-	&ac97_clk,
-	NULL
-};
-
-static void rate_clk_init(struct clk *clk)
-{
-	if (clk->calc) {
-		clk->calc(clk);
-		clk->flags |= CLK_HAS_RATE;
-		clk_register(clk);
-	} else {
-		printk(KERN_WARNING
-		       "Could not initialize clk %s without a calc routine\n",
-		       clk->name);
-	}
-}
-
-static void rate_clks_init(void)
-{
-	struct clk **cpp, *clk;
-
-	cpp = rate_clks;
-	while ((clk = *cpp++))
-		rate_clk_init(clk);
-}
-
-/*
- * There are two clk enable registers with 32 enable bits each
- * psc clocks and device clocks are all stored in dev_clks
- */
-static struct clk dev_clks[2][32];
-
-/*
- * Given a psc number return the dev_clk
- * associated with it
- */
-static struct clk *psc_dev_clk(int pscnum)
-{
-	int reg, bit;
-	struct clk *clk;
-
-	reg = 0;
-	bit = 27 - pscnum;
-
-	clk = &dev_clks[reg][bit];
-	clk->reg = 0;
-	clk->bit = bit;
-	return clk;
-}
-
-/*
- * PSC clock rate calculation
- */
-static void psc_calc_rate(struct clk *clk, int pscnum, struct device_node *np)
-{
-	unsigned long mclk_src = sys_clk.rate;
-	unsigned long mclk_div;
-
-	/*
-	 * Can only change value of mclk divider
-	 * when the divider is disabled.
-	 *
-	 * Zero is not a valid divider so minimum
-	 * divider is 1
-	 *
-	 * disable/set divider/enable
-	 */
-	out_be32(&clockctl->pccr[pscnum], 0);
-	out_be32(&clockctl->pccr[pscnum], 0x00020000);
-	out_be32(&clockctl->pccr[pscnum], 0x00030000);
-
-	if (in_be32(&clockctl->pccr[pscnum]) & 0x80) {
-		clk->rate = spdif_rxclk.rate;
-		return;
-	}
-
-	switch ((in_be32(&clockctl->pccr[pscnum]) >> 14) & 0x3) {
-	case 0:
-		mclk_src = sys_clk.rate;
-		break;
-	case 1:
-		mclk_src = ref_clk.rate;
-		break;
-	case 2:
-		mclk_src = psc_mclk_in.rate;
-		break;
-	case 3:
-		mclk_src = spdif_txclk.rate;
-		break;
-	}
-
-	mclk_div = ((in_be32(&clockctl->pccr[pscnum]) >> 17) & 0x7fff) + 1;
-	clk->rate = mclk_src / mclk_div;
-}
-
-/*
- * Find all psc nodes in device tree and assign a clock
- * with name "psc%d_mclk" and dev pointing at the device
- * returned from of_find_device_by_node
- */
-static void psc_clks_init(void)
-{
-	struct device_node *np;
-	struct platform_device *ofdev;
-	u32 reg;
-	const char *psc_compat;
-
-	psc_compat = mpc512x_select_psc_compat();
-	if (!psc_compat)
-		return;
-
-	for_each_compatible_node(np, NULL, psc_compat) {
-		if (!of_property_read_u32(np, "reg", &reg)) {
-			int pscnum = (reg & 0xf00) >> 8;
-			struct clk *clk = psc_dev_clk(pscnum);
-
-			clk->flags = CLK_HAS_RATE | CLK_HAS_CTRL;
-			ofdev = of_find_device_by_node(np);
-			clk->dev = &ofdev->dev;
-			/*
-			 * AC97 is special rate clock does
-			 * not go through normal path
-			 */
-			if (of_device_is_compatible(np, "fsl,mpc5121-psc-ac97"))
-				clk->rate = ac97_clk.rate;
-			else
-				psc_calc_rate(clk, pscnum, np);
-			sprintf(clk->name, "psc%d_mclk", pscnum);
-			clk_register(clk);
-			clk_enable(clk);
-		}
-	}
-}
-
-static struct clk_interface mpc5121_clk_functions = {
-	.clk_get		= mpc5121_clk_get,
-	.clk_enable		= mpc5121_clk_enable,
-	.clk_disable		= mpc5121_clk_disable,
-	.clk_get_rate		= mpc5121_clk_get_rate,
-	.clk_put		= mpc5121_clk_put,
-	.clk_round_rate		= mpc5121_clk_round_rate,
-	.clk_set_rate		= mpc5121_clk_set_rate,
-	.clk_set_parent		= NULL,
-	.clk_get_parent		= NULL,
-};
-
-int __init mpc5121_clk_init(void)
-{
-	struct device_node *np;
-
-	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock");
-	if (np) {
-		clockctl = of_iomap(np, 0);
-		of_node_put(np);
-	}
-
-	if (!clockctl) {
-		printk(KERN_ERR "Could not map clock control registers\n");
-		return 0;
-	}
-
-	rate_clks_init();
-	psc_clks_init();
-
-	/* leave clockctl mapped forever */
-	/*iounmap(clockctl); */
-	DEBUG_CLK_DUMP();
-	clocks_initialized++;
-	clk_functions = mpc5121_clk_functions;
-	return 0;
-}
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
index 0a134e0469e..3e90ece10ae 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -43,9 +43,7 @@ static void __init mpc5121_ads_setup_arch(void)
 		mpc83xx_add_bridge(np);
 #endif
 
-#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
-	mpc512x_setup_diu();
-#endif
+	mpc512x_setup_arch();
 }
 
 static void __init mpc5121_ads_init_IRQ(void)
@@ -69,7 +67,7 @@ define_machine(mpc5121_ads) {
 	.probe			= mpc5121_ads_probe,
 	.setup_arch		= mpc5121_ads_setup_arch,
 	.init			= mpc512x_init,
-	.init_early		= mpc512x_init_diu,
+	.init_early		= mpc512x_init_early,
 	.init_IRQ		= mpc5121_ads_init_IRQ,
 	.get_irq		= ipic_get_irq,
 	.calibrate_decr		= generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
index 0a8e6002394..cc97f022d02 100644
--- a/arch/powerpc/platforms/512x/mpc512x.h
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -12,18 +12,12 @@
 #ifndef __MPC512X_H__
 #define __MPC512X_H__
 extern void __init mpc512x_init_IRQ(void);
+extern void __init mpc512x_init_early(void);
 extern void __init mpc512x_init(void);
+extern void __init mpc512x_setup_arch(void);
 extern int __init mpc5121_clk_init(void);
-void __init mpc512x_declare_of_platform_devices(void);
 extern const char *mpc512x_select_psc_compat(void);
+extern const char *mpc512x_select_reset_compat(void);
 extern void mpc512x_restart(char *cmd);
 
-#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
-void mpc512x_init_diu(void);
-void mpc512x_setup_diu(void);
-#else
-#define mpc512x_init_diu NULL
-#define mpc512x_setup_diu NULL
-#endif
-
 #endif				/* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c
index 5fb919b3092..ce71408781a 100644
--- a/arch/powerpc/platforms/512x/mpc512x_generic.c
+++ b/arch/powerpc/platforms/512x/mpc512x_generic.c
@@ -45,8 +45,8 @@ define_machine(mpc512x_generic) {
 	.name			= "MPC512x generic",
 	.probe			= mpc512x_generic_probe,
 	.init			= mpc512x_init,
-	.init_early		= mpc512x_init_diu,
-	.setup_arch		= mpc512x_setup_diu,
+	.init_early		= mpc512x_init_early,
+	.setup_arch		= mpc512x_setup_arch,
 	.init_IRQ		= mpc512x_init_IRQ,
 	.get_irq		= ipic_get_irq,
 	.calibrate_decr		= generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 6eb94ab99d3..adb95f03d4d 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -12,6 +12,7 @@
  * (at your option) any later version.
  */
 
+#include <linux/clk.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/irq.h>
@@ -35,8 +36,10 @@ static struct mpc512x_reset_module __iomem *reset_module_base;
 static void __init mpc512x_restart_init(void)
 {
 	struct device_node *np;
+	const char *reset_compat;
 
-	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-reset");
+	reset_compat = mpc512x_select_reset_compat();
+	np = of_find_compatible_node(NULL, NULL, reset_compat);
 	if (!np)
 		return;
 
@@ -58,8 +61,6 @@ void mpc512x_restart(char *cmd)
 		;
 }
 
-#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
-
 struct fsl_diu_shared_fb {
 	u8		gamma[0x300];	/* 32-bit aligned! */
 	struct diu_ad	ad0;		/* 32-bit aligned! */
@@ -68,101 +69,115 @@ struct fsl_diu_shared_fb {
 	bool		in_use;
 };
 
-#define DIU_DIV_MASK	0x000000ff
-void mpc512x_set_pixel_clock(unsigned int pixclock)
+/* receives a pixel clock spec in pico seconds, adjusts the DIU clock rate */
+static void mpc512x_set_pixel_clock(unsigned int pixclock)
 {
-	unsigned long bestval, bestfreq, speed, busfreq;
-	unsigned long minpixclock, maxpixclock, pixval;
-	struct mpc512x_ccm __iomem *ccm;
 	struct device_node *np;
-	u32 temp;
-	long err;
-	int i;
+	struct clk *clk_diu;
+	unsigned long epsilon, minpixclock, maxpixclock;
+	unsigned long offset, want, got, delta;
 
-	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock");
+	/* lookup and enable the DIU clock */
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-diu");
 	if (!np) {
-		pr_err("Can't find clock control module.\n");
+		pr_err("Could not find DIU device tree node.\n");
 		return;
 	}
-
-	ccm = of_iomap(np, 0);
+	clk_diu = of_clk_get(np, 0);
+	if (IS_ERR(clk_diu)) {
+		/* backwards compat with device trees that lack clock specs */
+		clk_diu = clk_get_sys(np->name, "ipg");
+	}
 	of_node_put(np);
-	if (!ccm) {
-		pr_err("Can't map clock control module reg.\n");
+	if (IS_ERR(clk_diu)) {
+		pr_err("Could not lookup DIU clock.\n");
 		return;
 	}
-
-	np = of_find_node_by_type(NULL, "cpu");
-	if (np) {
-		const unsigned int *prop =
-			of_get_property(np, "bus-frequency", NULL);
-
-		of_node_put(np);
-		if (prop) {
-			busfreq = *prop;
-		} else {
-			pr_err("Can't get bus-frequency property\n");
-			return;
-		}
-	} else {
-		pr_err("Can't find 'cpu' node.\n");
+	if (clk_prepare_enable(clk_diu)) {
+		pr_err("Could not enable DIU clock.\n");
 		return;
 	}
 
-	/* Pixel Clock configuration */
-	pr_debug("DIU: Bus Frequency = %lu\n", busfreq);
-	speed = busfreq * 4; /* DIU_DIV ratio is 4 * CSB_CLK / DIU_CLK */
-
-	/* Calculate the pixel clock with the smallest error */
-	/* calculate the following in steps to avoid overflow */
-	pr_debug("DIU pixclock in ps - %d\n", pixclock);
-	temp = (1000000000 / pixclock) * 1000;
-	pixclock = temp;
-	pr_debug("DIU pixclock freq - %u\n", pixclock);
-
-	temp = temp / 20; /* pixclock * 0.05 */
-	pr_debug("deviation = %d\n", temp);
-	minpixclock = pixclock - temp;
-	maxpixclock = pixclock + temp;
-	pr_debug("DIU minpixclock - %lu\n", minpixclock);
-	pr_debug("DIU maxpixclock - %lu\n", maxpixclock);
-	pixval = speed/pixclock;
-	pr_debug("DIU pixval = %lu\n", pixval);
-
-	err = LONG_MAX;
-	bestval = pixval;
-	pr_debug("DIU bestval = %lu\n", bestval);
-
-	bestfreq = 0;
-	for (i = -1; i <= 1; i++) {
-		temp = speed / (pixval+i);
-		pr_debug("DIU test pixval i=%d, pixval=%lu, temp freq. = %u\n",
-			i, pixval, temp);
-		if ((temp < minpixclock) || (temp > maxpixclock))
-			pr_debug("DIU exceeds monitor range (%lu to %lu)\n",
-				minpixclock, maxpixclock);
-		else if (abs(temp - pixclock) < err) {
-			pr_debug("Entered the else if block %d\n", i);
-			err = abs(temp - pixclock);
-			bestval = pixval + i;
-			bestfreq = temp;
-		}
+	/*
+	 * convert the picoseconds spec into the desired clock rate,
+	 * determine the acceptable clock range for the monitor (+/- 5%),
+	 * do the calculation in steps to avoid integer overflow
+	 */
+	pr_debug("DIU pixclock in ps - %u\n", pixclock);
+	pixclock = (1000000000 / pixclock) * 1000;
+	pr_debug("DIU pixclock freq  - %u\n", pixclock);
+	epsilon = pixclock / 20; /* pixclock * 0.05 */
+	pr_debug("DIU deviation      - %lu\n", epsilon);
+	minpixclock = pixclock - epsilon;
+	maxpixclock = pixclock + epsilon;
+	pr_debug("DIU minpixclock    - %lu\n", minpixclock);
+	pr_debug("DIU maxpixclock    - %lu\n", maxpixclock);
+
+	/*
+	 * check whether the DIU supports the desired pixel clock
+	 *
+	 * - simply request the desired clock and see what the
+	 *   platform's clock driver will make of it, assuming that it
+	 *   will setup the best approximation of the requested value
+	 * - try other candidate frequencies in the order of decreasing
+	 *   preference (i.e. with increasing distance from the desired
+	 *   pixel clock, and checking the lower frequency before the
+	 *   higher frequency to not overload the hardware) until the
+	 *   first match is found -- any potential subsequent match
+	 *   would only be as good as the former match or typically
+	 *   would be less preferrable
+	 *
+	 * the offset increment of pixelclock divided by 64 is an
+	 * arbitrary choice -- it's simple to calculate, in the typical
+	 * case we expect the first check to succeed already, in the
+	 * worst case seven frequencies get tested (the exact center and
+	 * three more values each to the left and to the right) before
+	 * the 5% tolerance window is exceeded, resulting in fast enough
+	 * execution yet high enough probability of finding a suitable
+	 * value, while the error rate will be in the order of single
+	 * percents
+	 */
+	for (offset = 0; offset <= epsilon; offset += pixclock / 64) {
+		want = pixclock - offset;
+		pr_debug("DIU checking clock - %lu\n", want);
+		clk_set_rate(clk_diu, want);
+		got = clk_get_rate(clk_diu);
+		delta = abs(pixclock - got);
+		if (delta < epsilon)
+			break;
+		if (!offset)
+			continue;
+		want = pixclock + offset;
+		pr_debug("DIU checking clock - %lu\n", want);
+		clk_set_rate(clk_diu, want);
+		got = clk_get_rate(clk_diu);
+		delta = abs(pixclock - got);
+		if (delta < epsilon)
+			break;
+	}
+	if (offset <= epsilon) {
+		pr_debug("DIU clock accepted - %lu\n", want);
+		pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n",
+			 pixclock, got, delta, epsilon);
+		return;
 	}
+	pr_warn("DIU pixclock auto search unsuccessful\n");
 
-	pr_debug("DIU chose = %lx\n", bestval);
-	pr_debug("DIU error = %ld\n NomPixClk ", err);
-	pr_debug("DIU: Best Freq = %lx\n", bestfreq);
-	/* Modify DIU_DIV in CCM SCFR1 */
-	temp = in_be32(&ccm->scfr1);
-	pr_debug("DIU: Current value of SCFR1: 0x%08x\n", temp);
-	temp &= ~DIU_DIV_MASK;
-	temp |= (bestval & DIU_DIV_MASK);
-	out_be32(&ccm->scfr1, temp);
-	pr_debug("DIU: Modified value of SCFR1: 0x%08x\n", temp);
-	iounmap(ccm);
+	/*
+	 * what is the most appropriate action to take when the search
+	 * for an available pixel clock which is acceptable to the
+	 * monitor has failed?  disable the DIU (clock) or just provide
+	 * a "best effort"?  we go with the latter
+	 */
+	pr_warn("DIU pixclock best effort fallback (backend's choice)\n");
+	clk_set_rate(clk_diu, pixclock);
+	got = clk_get_rate(clk_diu);
+	delta = abs(pixclock - got);
+	pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n",
+		 pixclock, got, delta, epsilon);
 }
 
-enum fsl_diu_monitor_port
+static enum fsl_diu_monitor_port
 mpc512x_valid_monitor_port(enum fsl_diu_monitor_port port)
 {
 	return FSL_DIU_PORT_DVI;
@@ -177,7 +192,7 @@ static inline void mpc512x_free_bootmem(struct page *page)
 	free_reserved_page(page);
 }
 
-void mpc512x_release_bootmem(void)
+static void mpc512x_release_bootmem(void)
 {
 	unsigned long addr = diu_shared_fb.fb_phys & PAGE_MASK;
 	unsigned long size = diu_shared_fb.fb_len;
@@ -203,7 +218,7 @@ void mpc512x_release_bootmem(void)
  * address range will be reserved in setup_arch() after bootmem
  * allocator is up.
  */
-void __init mpc512x_init_diu(void)
+static void __init mpc512x_init_diu(void)
 {
 	struct device_node *np;
 	struct diu __iomem *diu_reg;
@@ -272,7 +287,7 @@ out:
 	iounmap(diu_reg);
 }
 
-void __init mpc512x_setup_diu(void)
+static void __init mpc512x_setup_diu(void)
 {
 	int ret;
 
@@ -301,8 +316,6 @@ void __init mpc512x_setup_diu(void)
 	diu_ops.release_bootmem		= mpc512x_release_bootmem;
 }
 
-#endif
-
 void __init mpc512x_init_IRQ(void)
 {
 	struct device_node *np;
@@ -335,7 +348,7 @@ static struct of_device_id __initdata of_bus_ids[] = {
 	{},
 };
 
-void __init mpc512x_declare_of_platform_devices(void)
+static void __init mpc512x_declare_of_platform_devices(void)
 {
 	if (of_platform_bus_probe(NULL, of_bus_ids, NULL))
 		printk(KERN_ERR __FILE__ ": "
@@ -355,6 +368,17 @@ const char *mpc512x_select_psc_compat(void)
 	return NULL;
 }
 
+const char *mpc512x_select_reset_compat(void)
+{
+	if (of_machine_is_compatible("fsl,mpc5121"))
+		return "fsl,mpc5121-reset";
+
+	if (of_machine_is_compatible("fsl,mpc5125"))
+		return "fsl,mpc5125-reset";
+
+	return NULL;
+}
+
 static unsigned int __init get_fifo_size(struct device_node *np,
 					 char *prop_name)
 {
@@ -374,7 +398,7 @@ static unsigned int __init get_fifo_size(struct device_node *np,
 		    ((u32)(_base) + sizeof(struct mpc52xx_psc)))
 
 /* Init PSC FIFO space for TX and RX slices */
-void __init mpc512x_psc_fifo_init(void)
+static void __init mpc512x_psc_fifo_init(void)
 {
 	struct device_node *np;
 	void __iomem *psc;
@@ -436,14 +460,26 @@ void __init mpc512x_psc_fifo_init(void)
 	}
 }
 
+void __init mpc512x_init_early(void)
+{
+	mpc512x_restart_init();
+	if (IS_ENABLED(CONFIG_FB_FSL_DIU))
+		mpc512x_init_diu();
+}
+
 void __init mpc512x_init(void)
 {
 	mpc5121_clk_init();
 	mpc512x_declare_of_platform_devices();
-	mpc512x_restart_init();
 	mpc512x_psc_fifo_init();
 }
 
+void __init mpc512x_setup_arch(void)
+{
+	if (IS_ENABLED(CONFIG_FB_FSL_DIU))
+		mpc512x_setup_diu();
+}
+
 /**
  * mpc512x_cs_config - Setup chip select configuration
  * @cs: chip select number
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
index 0575e858291..116f2325b20 100644
--- a/arch/powerpc/platforms/512x/pdm360ng.c
+++ b/arch/powerpc/platforms/512x/pdm360ng.c
@@ -14,6 +14,8 @@
 
 #include <linux/kernel.h>
 #include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 
 #include <asm/machdep.h>
@@ -119,9 +121,9 @@ static int __init pdm360ng_probe(void)
 define_machine(pdm360ng) {
 	.name			= "PDM360NG",
 	.probe			= pdm360ng_probe,
-	.setup_arch		= mpc512x_setup_diu,
+	.setup_arch		= mpc512x_setup_arch,
 	.init			= pdm360ng_init,
-	.init_early		= mpc512x_init_diu,
+	.init_early		= mpc512x_init_early,
 	.init_IRQ		= mpc512x_init_IRQ,
 	.get_irq		= ipic_get_irq,
 	.calibrate_decr		= generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index 90f4496017e..b625a2c6f4f 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -1,7 +1,7 @@
 config PPC_MPC52xx
 	bool "52xx-based boards"
 	depends on 6xx
-	select PPC_CLOCK
+	select COMMON_CLK
 	select PPC_PCI_CHOICE
 
 config PPC_MPC5200_SIMPLE
@@ -57,5 +57,5 @@ config PPC_MPC5200_BUGFIX
 
 config PPC_MPC5200_LPBFIFO
 	tristate "MPC5200 LocalPlus bus FIFO driver"
-	depends on PPC_MPC52xx
+	depends on PPC_MPC52xx && PPC_BESTCOMM
 	select PPC_BESTCOMM_GEN_BD
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 18c10482019..6e19b0ad5d2 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -199,8 +199,8 @@ static void __init efika_setup_arch(void)
 
 static int __init efika_probe(void)
 {
-	char *model = of_get_flat_dt_prop(of_get_flat_dt_root(),
-					  "model", NULL);
+	const char *model = of_get_flat_dt_prop(of_get_flat_dt_root(),
+						"model", NULL);
 
 	if (model == NULL)
 		return 0;
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
index be7b1aa4d54..37f7a89c10f 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
@@ -245,7 +245,7 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id)
 
 	if (dma && !write) {
 		spin_unlock_irqrestore(&lpbfifo.lock, flags);
-		pr_err("bogus LPBFIFO IRQ (dma and not writting)\n");
+		pr_err("bogus LPBFIFO IRQ (dma and not writing)\n");
 		return IRQ_HANDLED;
 	}
 
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
index b89ef65392d..2898b737deb 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -340,7 +340,7 @@ static int mpc52xx_irqhost_map(struct irq_domain *h, unsigned int virq,
 {
 	int l1irq;
 	int l2irq;
-	struct irq_chip *irqchip;
+	struct irq_chip *uninitialized_var(irqchip);
 	void *hndlr;
 	int type;
 	u32 reg;
diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c
index 30394b409b3..6a14cf50f4a 100644
--- a/arch/powerpc/platforms/82xx/mpc8272_ads.c
+++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c
@@ -16,6 +16,8 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/fsl_devices.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 #include <linux/io.h>
 
diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c
index e1dceeec499..e5f82ec8df1 100644
--- a/arch/powerpc/platforms/82xx/pq2fads.c
+++ b/arch/powerpc/platforms/82xx/pq2fads.c
@@ -15,6 +15,8 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/fsl_devices.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 
 #include <asm/io.h>
diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
index 670a033264c..2bdc8c862c4 100644
--- a/arch/powerpc/platforms/83xx/Kconfig
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -99,7 +99,6 @@ config SBC834x
 config ASP834x
 	bool "Analogue & Micro ASP 834x"
 	select PPC_MPC834x
-	select REDBOOT
 	help
 	  This enables support for the Analogue & Micro ASP 83xx
 	  board.
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 624cb51d19c..e238b6a55b1 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -11,7 +11,6 @@
  * (at your option) any later version.
  */
 
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/device.h>
@@ -204,7 +203,6 @@ static int mcu_remove(struct i2c_client *client)
 	ret = mcu_gpiochip_remove(mcu);
 	if (ret)
 		return ret;
-	i2c_set_clientdata(client, NULL);
 	kfree(mcu);
 	return 0;
 }
@@ -231,17 +229,7 @@ static struct i2c_driver mcu_driver = {
 	.id_table = mcu_ids,
 };
 
-static int __init mcu_init(void)
-{
-	return i2c_add_driver(&mcu_driver);
-}
-module_init(mcu_init);
-
-static void __exit mcu_exit(void)
-{
-	i2c_del_driver(&mcu_driver);
-}
-module_exit(mcu_exit);
+module_i2c_driver(mcu_driver);
 
 MODULE_DESCRIPTION("Power Management and GPIO expander driver for "
 		   "MPC8349E-mITX-compatible MCU");
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 1d769a29249..4b4c081df94 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -10,7 +10,6 @@
  * by the Free Software Foundation.
  */
 
-#include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/types.h>
 #include <linux/ioport.h>
@@ -20,6 +19,8 @@
 #include <linux/freezer.h>
 #include <linux/suspend.h>
 #include <linux/fsl_devices.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/export.h>
 
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index efdd37c775a..f442120e003 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -32,6 +32,21 @@ config BSC9131_RDB
 	  StarCore SC3850 DSP
 	  Manufacturer : Freescale Semiconductor, Inc
 
+config C293_PCIE
+	  bool "Freescale C293PCIE"
+	  select DEFAULT_UIMAGE
+	  help
+	  This option enables support for the C293PCIE board
+
+config BSC9132_QDS
+	bool "Freescale BSC9132QDS"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the Freescale BSC9132 QDS board.
+	  BSC9132 is a heterogeneous SoC containing dual e500v2 powerpc cores
+	  and dual StarCore SC3850 DSP cores.
+	  Manufacturer : Freescale Semiconductor, Inc
+
 config MPC8540_ADS
 	bool "Freescale MPC8540 ADS"
 	select DEFAULT_UIMAGE
@@ -111,11 +126,17 @@ config P1022_RDK
 	  This option enables support for the Freescale / iVeia P1022RDK
 	  reference board.
 
-config P1023_RDS
-	bool "Freescale P1023 RDS"
+config P1023_RDB
+	bool "Freescale P1023 RDB"
 	select DEFAULT_UIMAGE
 	help
-	  This option enables support for the P1023 RDS board
+	  This option enables support for the P1023 RDB board.
+
+config TWR_P102x
+	bool "Freescale TWR-P102x"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the TWR-P1025 board.
 
 config SOCRATES
 	bool "Socrates"
@@ -212,83 +233,16 @@ config GE_IMP3A
 	  This board is a 3U CompactPCI Single Board Computer with a Freescale
 	  P2020 processor.
 
-config P2041_RDB
-	bool "Freescale P2041 RDB"
-	select DEFAULT_UIMAGE
-	select PPC_E500MC
-	select PHYS_64BIT
-	select SWIOTLB
-	select ARCH_REQUIRE_GPIOLIB
-	select GPIO_MPC8XXX
-	select HAS_RAPIDIO
-	select PPC_EPAPR_HV_PIC
-	help
-	  This option enables support for the P2041 RDB board
-
-config P3041_DS
-	bool "Freescale P3041 DS"
-	select DEFAULT_UIMAGE
-	select PPC_E500MC
-	select PHYS_64BIT
-	select SWIOTLB
-	select ARCH_REQUIRE_GPIOLIB
-	select GPIO_MPC8XXX
-	select HAS_RAPIDIO
-	select PPC_EPAPR_HV_PIC
-	help
-	  This option enables support for the P3041 DS board
-
-config P4080_DS
-	bool "Freescale P4080 DS"
-	select DEFAULT_UIMAGE
-	select PPC_E500MC
-	select PHYS_64BIT
-	select SWIOTLB
-	select ARCH_REQUIRE_GPIOLIB
-	select GPIO_MPC8XXX
-	select HAS_RAPIDIO
-	select PPC_EPAPR_HV_PIC
-	help
-	  This option enables support for the P4080 DS board
-
 config SGY_CTS1000
 	tristate "Servergy CTS-1000 support"
 	select GPIOLIB
 	select OF_GPIO
-	depends on P4080_DS
+	depends on CORENET_GENERIC
 	help
 	  Enable this to support functionality in Servergy's CTS-1000 systems.
 
 endif # PPC32
 
-config P5020_DS
-	bool "Freescale P5020 DS"
-	select DEFAULT_UIMAGE
-	select E500
-	select PPC_E500MC
-	select PHYS_64BIT
-	select SWIOTLB
-	select ARCH_REQUIRE_GPIOLIB
-	select GPIO_MPC8XXX
-	select HAS_RAPIDIO
-	select PPC_EPAPR_HV_PIC
-	help
-	  This option enables support for the P5020 DS board
-
-config P5040_DS
-	bool "Freescale P5040 DS"
-	select DEFAULT_UIMAGE
-	select E500
-	select PPC_E500MC
-	select PHYS_64BIT
-	select SWIOTLB
-	select ARCH_REQUIRE_GPIOLIB
-	select GPIO_MPC8XXX
-	select HAS_RAPIDIO
-	select PPC_EPAPR_HV_PIC
-	help
-	  This option enables support for the P5040 DS board
-
 config PPC_QEMU_E500
 	bool "QEMU generic e500 platform"
 	select DEFAULT_UIMAGE
@@ -304,10 +258,8 @@ config PPC_QEMU_E500
 	  unset based on the emulated CPU (or actual host CPU in the case
 	  of KVM).
 
-if PPC64
-
-config T4240_QDS
-	bool "Freescale T4240 QDS"
+config CORENET_GENERIC
+	bool "Freescale CoreNet Generic"
 	select DEFAULT_UIMAGE
 	select E500
 	select PPC_E500MC
@@ -318,26 +270,14 @@ config T4240_QDS
 	select HAS_RAPIDIO
 	select PPC_EPAPR_HV_PIC
 	help
-	  This option enables support for the T4240 QDS board
-
-config B4_QDS
-	bool "Freescale B4 QDS"
-	select DEFAULT_UIMAGE
-	select E500
-	select PPC_E500MC
-	select PHYS_64BIT
-	select SWIOTLB
-	select GPIOLIB
-	select ARCH_REQUIRE_GPIOLIB
-	select HAS_RAPIDIO
-	select PPC_EPAPR_HV_PIC
-	help
-	  This option enables support for the B4 QDS board
-	  The B4 application development system B4 QDS is a complete
-	  debugging environment intended for engineers developing
-	  applications for the B4.
+	  This option enables support for the FSL CoreNet based boards.
+	  For 32bit kernel, the following boards are supported:
+	    P2041 RDB, P3041 DS, P4080 DS, kmcoge4, and OCA4080
+	  For 64bit kernel, the following boards are supported:
+	    T4240 QDS and B4 QDS
+	  The following boards are supported for both 32bit and 64bit kernel:
+	    P5020 DS, P5040 DS and T104xQDS
 
-endif
 endif # FSL_SOC_BOOKE
 
 config TQM85xx
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index 2eab37ea4a9..73032604662 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -6,6 +6,8 @@ obj-$(CONFIG_SMP) += smp.o
 obj-y += common.o
 
 obj-$(CONFIG_BSC9131_RDB) += bsc913x_rdb.o
+obj-$(CONFIG_BSC9132_QDS) += bsc913x_qds.o
+obj-$(CONFIG_C293_PCIE)   += c293pcie.o
 obj-$(CONFIG_MPC8540_ADS) += mpc85xx_ads.o
 obj-$(CONFIG_MPC8560_ADS) += mpc85xx_ads.o
 obj-$(CONFIG_MPC85xx_CDS) += mpc85xx_cds.o
@@ -16,14 +18,9 @@ obj-$(CONFIG_MPC85xx_RDB) += mpc85xx_rdb.o
 obj-$(CONFIG_P1010_RDB)   += p1010rdb.o
 obj-$(CONFIG_P1022_DS)    += p1022_ds.o
 obj-$(CONFIG_P1022_RDK)   += p1022_rdk.o
-obj-$(CONFIG_P1023_RDS)   += p1023_rds.o
-obj-$(CONFIG_P2041_RDB)   += p2041_rdb.o corenet_ds.o
-obj-$(CONFIG_P3041_DS)    += p3041_ds.o corenet_ds.o
-obj-$(CONFIG_P4080_DS)    += p4080_ds.o corenet_ds.o
-obj-$(CONFIG_P5020_DS)    += p5020_ds.o corenet_ds.o
-obj-$(CONFIG_P5040_DS)    += p5040_ds.o corenet_ds.o
-obj-$(CONFIG_T4240_QDS)   += t4240_qds.o corenet_ds.o
-obj-$(CONFIG_B4_QDS)	  += b4_qds.o corenet_ds.o
+obj-$(CONFIG_P1023_RDB)   += p1023_rdb.o
+obj-$(CONFIG_TWR_P102x)   += twr_p102x.o
+obj-$(CONFIG_CORENET_GENERIC)   += corenet_generic.o
 obj-$(CONFIG_STX_GP3)	  += stx_gp3.o
 obj-$(CONFIG_TQM85xx)	  += tqm85xx.o
 obj-$(CONFIG_SBC8548)     += sbc8548.o
diff --git a/arch/powerpc/platforms/85xx/b4_qds.c b/arch/powerpc/platforms/85xx/b4_qds.c
deleted file mode 100644
index 0c6702f8b88..00000000000
--- a/arch/powerpc/platforms/85xx/b4_qds.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * B4 QDS Setup
- * Should apply for QDS platform of B4860 and it's personalities.
- * viz B4860/B4420/B4220QDS
- *
- * Copyright 2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/phy.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <linux/of_platform.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-#include <asm/ehv_pic.h>
-
-#include "corenet_ds.h"
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init b4_qds_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-#ifdef CONFIG_SMP
-	extern struct smp_ops_t smp_85xx_ops;
-#endif
-
-	if ((of_flat_dt_is_compatible(root, "fsl,B4860QDS")) ||
-		(of_flat_dt_is_compatible(root, "fsl,B4420QDS")) ||
-			(of_flat_dt_is_compatible(root, "fsl,B4220QDS")))
-		return 1;
-
-	/* Check if we're running under the Freescale hypervisor */
-	if ((of_flat_dt_is_compatible(root, "fsl,B4860QDS-hv")) ||
-		(of_flat_dt_is_compatible(root, "fsl,B4420QDS-hv")) ||
-			(of_flat_dt_is_compatible(root, "fsl,B4220QDS-hv"))) {
-		ppc_md.init_IRQ = ehv_pic_init;
-		ppc_md.get_irq = ehv_pic_get_irq;
-		ppc_md.restart = fsl_hv_restart;
-		ppc_md.power_off = fsl_hv_halt;
-		ppc_md.halt = fsl_hv_halt;
-#ifdef CONFIG_SMP
-		/*
-		 * Disable the timebase sync operations because we can't write
-		 * to the timebase registers under the hypervisor.
-		  */
-		smp_85xx_ops.give_timebase = NULL;
-		smp_85xx_ops.take_timebase = NULL;
-#endif
-		return 1;
-	}
-
-	return 0;
-}
-
-define_machine(b4_qds) {
-	.name			= "B4 QDS",
-	.probe			= b4_qds_probe,
-	.setup_arch		= corenet_ds_setup_arch,
-	.init_IRQ		= corenet_ds_pic_init,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-#endif
-/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
-#ifdef CONFIG_PPC64
-	.get_irq		= mpic_get_irq,
-#else
-	.get_irq		= mpic_get_coreint_irq,
-#endif
-	.restart		= fsl_rstcr_restart,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= udbg_progress,
-#ifdef CONFIG_PPC64
-	.power_save		= book3e_idle,
-#else
-	.power_save		= e500_idle,
-#endif
-};
-
-machine_arch_initcall(b4_qds, corenet_ds_publish_devices);
-
-#ifdef CONFIG_SWIOTLB
-machine_arch_initcall(b4_qds, swiotlb_setup_bus_notifier);
-#endif
diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c
new file mode 100644
index 00000000000..f0927e58af2
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c
@@ -0,0 +1,74 @@
+/*
+ * BSC913xQDS Board Setup
+ *
+ * Author:
+ *   Harninder Rai <harninder.rai@freescale.com>
+ *   Priyanka Jain <Priyanka.Jain@freescale.com>
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <asm/udbg.h>
+
+#include "mpc85xx.h"
+#include "smp.h"
+
+void __init bsc913x_qds_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+	  MPIC_SINGLE_DEST_CPU,
+	  0, 256, " OpenPIC  ");
+
+	if (!mpic)
+		pr_err("bsc913x: Failed to allocate MPIC structure\n");
+	else
+		mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init bsc913x_qds_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("bsc913x_qds_setup_arch()", 0);
+
+#if defined(CONFIG_SMP)
+	mpc85xx_smp_init();
+#endif
+
+	pr_info("bsc913x board from Freescale Semiconductor\n");
+}
+
+machine_device_initcall(bsc9132_qds, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+
+static int __init bsc9132_qds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,bsc9132qds");
+}
+
+define_machine(bsc9132_qds) {
+	.name			= "BSC9132 QDS",
+	.probe			= bsc9132_qds_probe,
+	.setup_arch		= bsc913x_qds_setup_arch,
+	.init_IRQ		= bsc913x_qds_pic_init,
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c
new file mode 100644
index 00000000000..84476b64600
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/c293pcie.c
@@ -0,0 +1,77 @@
+/*
+ * C293PCIE Board Setup
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+void __init c293_pcie_pic_init(void)
+{
+	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+	  MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+}
+
+
+/*
+ * Setup the architecture
+ */
+static void __init c293_pcie_setup_arch(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("c293_pcie_setup_arch()", 0);
+
+	fsl_pci_assign_primary();
+
+	printk(KERN_INFO "C293 PCIE board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(c293_pcie, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init c293_pcie_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (of_flat_dt_is_compatible(root, "fsl,C293PCIE"))
+		return 1;
+	return 0;
+}
+
+define_machine(c293_pcie) {
+	.name			= "C293 PCIE",
+	.probe			= c293_pcie_probe,
+	.setup_arch		= c293_pcie_setup_arch,
+	.init_IRQ		= c293_pcie_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index d0861a0d836..b564b5e23f7 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -5,8 +5,11 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
+#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 
+#include <asm/qe.h>
 #include <sysdev/cpm2_pic.h>
 
 #include "mpc85xx.h"
@@ -80,3 +83,46 @@ void __init mpc85xx_cpm2_pic_init(void)
 	irq_set_chained_handler(irq, cpm2_cascade);
 }
 #endif
+
+#ifdef CONFIG_QUICC_ENGINE
+void __init mpc85xx_qe_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,qe");
+	if (!np) {
+		np = of_find_node_by_name(NULL, "qe");
+		if (!np) {
+			pr_err("%s: Could not find Quicc Engine node\n",
+					__func__);
+			return;
+		}
+	}
+
+	if (!of_device_is_available(np)) {
+		of_node_put(np);
+		return;
+	}
+
+	qe_reset();
+	of_node_put(np);
+
+}
+
+void __init mpc85xx_qe_par_io_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_node_by_name(NULL, "par_io");
+	if (np) {
+		struct device_node *ucc;
+
+		par_io_init(np);
+		of_node_put(np);
+
+		for_each_node_by_name(ucc, "ucc")
+			par_io_of_config(ucc);
+
+	}
+}
+#endif
diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c
deleted file mode 100644
index c59c617eee9..00000000000
--- a/arch/powerpc/platforms/85xx/corenet_ds.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Corenet based SoC DS Setup
- *
- * Maintained by Kumar Gala (see MAINTAINERS for contact information)
- *
- * Copyright 2009-2011 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <linux/of_platform.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-#include "smp.h"
-
-void __init corenet_ds_pic_init(void)
-{
-	struct mpic *mpic;
-	unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
-		MPIC_NO_RESET;
-
-	if (ppc_md.get_irq == mpic_get_coreint_irq)
-		flags |= MPIC_ENABLE_COREINT;
-
-	mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC  ");
-	BUG_ON(mpic == NULL);
-
-	mpic_init(mpic);
-}
-
-/*
- * Setup the architecture
- */
-void __init corenet_ds_setup_arch(void)
-{
-	mpc85xx_smp_init();
-
-#if defined(CONFIG_PCI) && defined(CONFIG_PPC64)
-	pci_devs_phb_init();
-#endif
-
-	fsl_pci_assign_primary();
-
-	swiotlb_detect_4g();
-
-	pr_info("%s board from Freescale Semiconductor\n", ppc_md.name);
-}
-
-static const struct of_device_id of_device_ids[] = {
-	{
-		.compatible	= "simple-bus"
-	},
-	{
-		.compatible	= "fsl,srio",
-	},
-	{
-		.compatible	= "fsl,p4080-pcie",
-	},
-	{
-		.compatible	= "fsl,qoriq-pcie-v2.2",
-	},
-	{
-		.compatible	= "fsl,qoriq-pcie-v2.3",
-	},
-	{
-		.compatible	= "fsl,qoriq-pcie-v2.4",
-	},
-	{
-		.compatible	= "fsl,qoriq-pcie-v3.0",
-	},
-	/* The following two are for the Freescale hypervisor */
-	{
-		.name		= "hypervisor",
-	},
-	{
-		.name		= "handles",
-	},
-	{}
-};
-
-int __init corenet_ds_publish_devices(void)
-{
-	return of_platform_bus_probe(NULL, of_device_ids, NULL);
-}
diff --git a/arch/powerpc/platforms/85xx/corenet_ds.h b/arch/powerpc/platforms/85xx/corenet_ds.h
deleted file mode 100644
index ddd700b2303..00000000000
--- a/arch/powerpc/platforms/85xx/corenet_ds.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Corenet based SoC DS Setup
- *
- * Copyright 2009 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#ifndef CORENET_DS_H
-#define CORENET_DS_H
-
-extern void __init corenet_ds_pic_init(void);
-extern void __init corenet_ds_setup_arch(void);
-extern int __init corenet_ds_publish_devices(void);
-
-#endif
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
new file mode 100644
index 00000000000..5db1e117fdd
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -0,0 +1,206 @@
+/*
+ * Corenet based SoC DS Setup
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/ehv_pic.h>
+#include <asm/qe_ic.h>
+
+#include <linux/of_platform.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+#include "mpc85xx.h"
+
+void __init corenet_gen_pic_init(void)
+{
+	struct mpic *mpic;
+	unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
+		MPIC_NO_RESET;
+
+	struct device_node *np;
+
+	if (ppc_md.get_irq == mpic_get_coreint_irq)
+		flags |= MPIC_ENABLE_COREINT;
+
+	mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC  ");
+	BUG_ON(mpic == NULL);
+
+	mpic_init(mpic);
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
+	if (np) {
+		qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
+				qe_ic_cascade_high_mpic);
+		of_node_put(np);
+	}
+}
+
+/*
+ * Setup the architecture
+ */
+void __init corenet_gen_setup_arch(void)
+{
+	mpc85xx_smp_init();
+
+	swiotlb_detect_4g();
+
+	pr_info("%s board\n", ppc_md.name);
+
+	mpc85xx_qe_init();
+}
+
+static const struct of_device_id of_device_ids[] = {
+	{
+		.compatible	= "simple-bus"
+	},
+	{
+		.compatible	= "fsl,srio",
+	},
+	{
+		.compatible	= "fsl,p4080-pcie",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v2.2",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v2.3",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v2.4",
+	},
+	{
+		.compatible	= "fsl,qoriq-pcie-v3.0",
+	},
+	{
+		.compatible	= "fsl,qe",
+	},
+	/* The following two are for the Freescale hypervisor */
+	{
+		.name		= "hypervisor",
+	},
+	{
+		.name		= "handles",
+	},
+	{}
+};
+
+int __init corenet_gen_publish_devices(void)
+{
+	return of_platform_bus_probe(NULL, of_device_ids, NULL);
+}
+
+static const char * const boards[] __initconst = {
+	"fsl,P2041RDB",
+	"fsl,P3041DS",
+	"fsl,OCA4080",
+	"fsl,P4080DS",
+	"fsl,P5020DS",
+	"fsl,P5040DS",
+	"fsl,T4240QDS",
+	"fsl,B4860QDS",
+	"fsl,B4420QDS",
+	"fsl,B4220QDS",
+	"fsl,T1040QDS",
+	"fsl,T1042QDS",
+	"keymile,kmcoge4",
+	NULL
+};
+
+static const char * const hv_boards[] __initconst = {
+	"fsl,P2041RDB-hv",
+	"fsl,P3041DS-hv",
+	"fsl,OCA4080-hv",
+	"fsl,P4080DS-hv",
+	"fsl,P5020DS-hv",
+	"fsl,P5040DS-hv",
+	"fsl,T4240QDS-hv",
+	"fsl,B4860QDS-hv",
+	"fsl,B4420QDS-hv",
+	"fsl,B4220QDS-hv",
+	"fsl,T1040QDS-hv",
+	"fsl,T1042QDS-hv",
+	NULL
+};
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init corenet_generic_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+#ifdef CONFIG_SMP
+	extern struct smp_ops_t smp_85xx_ops;
+#endif
+
+	if (of_flat_dt_match(root, boards))
+		return 1;
+
+	/* Check if we're running under the Freescale hypervisor */
+	if (of_flat_dt_match(root, hv_boards)) {
+		ppc_md.init_IRQ = ehv_pic_init;
+		ppc_md.get_irq = ehv_pic_get_irq;
+		ppc_md.restart = fsl_hv_restart;
+		ppc_md.power_off = fsl_hv_halt;
+		ppc_md.halt = fsl_hv_halt;
+#ifdef CONFIG_SMP
+		/*
+		 * Disable the timebase sync operations because we can't write
+		 * to the timebase registers under the hypervisor.
+		  */
+		smp_85xx_ops.give_timebase = NULL;
+		smp_85xx_ops.take_timebase = NULL;
+#endif
+		return 1;
+	}
+
+	return 0;
+}
+
+define_machine(corenet_generic) {
+	.name			= "CoreNet Generic",
+	.probe			= corenet_generic_probe,
+	.setup_arch		= corenet_gen_setup_arch,
+	.init_IRQ		= corenet_gen_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_coreint_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PPC64
+	.power_save		= book3e_idle,
+#else
+	.power_save		= e500_idle,
+#endif
+};
+
+machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
+
+#ifdef CONFIG_SWIOTLB
+machine_arch_initcall(corenet_generic, swiotlb_setup_bus_notifier);
+#endif
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
index e6285ae6f42..11790e074c8 100644
--- a/arch/powerpc/platforms/85xx/ge_imp3a.c
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -215,6 +215,7 @@ define_machine(ge_imp3a) {
 	.show_cpuinfo		= ge_imp3a_show_cpuinfo,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index 15ce4b55f11..a378ba3519e 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -76,6 +76,7 @@ define_machine(mpc8536_ds) {
 	.init_IRQ		= mpc8536_ds_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
diff --git a/arch/powerpc/platforms/85xx/mpc85xx.h b/arch/powerpc/platforms/85xx/mpc85xx.h
index 2aa7c5dc2c7..39056f6befe 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx.h
+++ b/arch/powerpc/platforms/85xx/mpc85xx.h
@@ -8,4 +8,12 @@ extern void mpc85xx_cpm2_pic_init(void);
 static inline void __init mpc85xx_cpm2_pic_init(void) {}
 #endif /* CONFIG_CPM2 */
 
+#ifdef CONFIG_QUICC_ENGINE
+extern void mpc85xx_qe_init(void);
+extern void mpc85xx_qe_par_io_init(void);
+#else
+static inline void __init mpc85xx_qe_init(void) {}
+static inline void __init mpc85xx_qe_par_io_init(void) {}
+#endif
+
 #endif
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 7a31a0e1df2..b0753e22208 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -385,6 +385,7 @@ define_machine(mpc85xx_cds) {
 #ifdef CONFIG_PCI
 	.restart	= mpc85xx_cds_restart,
 	.pcibios_fixup_bus	= mpc85xx_cds_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #else
 	.restart	= fsl_rstcr_restart,
 #endif
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index 9ebb91ed96a..ffdf02121a7 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -209,6 +209,7 @@ define_machine(mpc8544_ds) {
 	.init_IRQ		= mpc85xx_ds_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -223,6 +224,7 @@ define_machine(mpc8572_ds) {
 	.init_IRQ		= mpc85xx_ds_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -237,6 +239,7 @@ define_machine(p2020_ds) {
 	.init_IRQ		= mpc85xx_ds_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index a7b3621a8df..a392e94a07f 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006-2010, 2012 Freescale Semiconductor, Inc.
+ * Copyright (C) 2006-2010, 2012-2013 Freescale Semiconductor, Inc.
  * All rights reserved.
  *
  * Author: Andy Fleming <afleming@freescale.com>
@@ -238,32 +238,8 @@ static void __init mpc85xx_mds_qe_init(void)
 {
 	struct device_node *np;
 
-	np = of_find_compatible_node(NULL, NULL, "fsl,qe");
-	if (!np) {
-		np = of_find_node_by_name(NULL, "qe");
-		if (!np)
-			return;
-	}
-
-	if (!of_device_is_available(np)) {
-		of_node_put(np);
-		return;
-	}
-
-	qe_reset();
-	of_node_put(np);
-
-	np = of_find_node_by_name(NULL, "par_io");
-	if (np) {
-		struct device_node *ucc;
-
-		par_io_init(np);
-		of_node_put(np);
-
-		for_each_node_by_name(ucc, "ucc")
-			par_io_of_config(ucc);
-	}
-
+	mpc85xx_qe_init();
+	mpc85xx_qe_par_io_init();
 	mpc85xx_mds_reset_ucc_phys();
 
 	if (machine_is(p1021_mds)) {
@@ -416,6 +392,7 @@ define_machine(mpc8568_mds) {
 	.progress	= udbg_progress,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 };
 
@@ -437,6 +414,7 @@ define_machine(mpc8569_mds) {
 	.progress	= udbg_progress,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 };
 
@@ -459,6 +437,7 @@ define_machine(p1021_mds) {
 	.progress	= udbg_progress,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 };
 
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index ede8771d6f0..e358bed66d0 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -1,7 +1,7 @@
 /*
  * MPC85xx RDB Board Setup
  *
- * Copyright 2009,2012 Freescale Semiconductor Inc.
+ * Copyright 2009,2012-2013 Freescale Semiconductor Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -86,10 +86,6 @@ void __init mpc85xx_rdb_pic_init(void)
  */
 static void __init mpc85xx_rdb_setup_arch(void)
 {
-#ifdef CONFIG_QUICC_ENGINE
-	struct device_node *np;
-#endif
-
 	if (ppc_md.progress)
 		ppc_md.progress("mpc85xx_rdb_setup_arch()", 0);
 
@@ -98,28 +94,11 @@ static void __init mpc85xx_rdb_setup_arch(void)
 	fsl_pci_assign_primary();
 
 #ifdef CONFIG_QUICC_ENGINE
-	np = of_find_compatible_node(NULL, NULL, "fsl,qe");
-	if (!np) {
-		pr_err("%s: Could not find Quicc Engine node\n", __func__);
-		goto qe_fail;
-	}
-
-	qe_reset();
-	of_node_put(np);
-
-	np = of_find_node_by_name(NULL, "par_io");
-	if (np) {
-		struct device_node *ucc;
-
-		par_io_init(np);
-		of_node_put(np);
-
-		for_each_node_by_name(ucc, "ucc")
-			par_io_of_config(ucc);
-
-	}
+	mpc85xx_qe_init();
+	mpc85xx_qe_par_io_init();
 #if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
 	if (machine_is(p1025_rdb)) {
+		struct device_node *np;
 
 		struct ccsr_guts __iomem *guts;
 
@@ -148,8 +127,6 @@ static void __init mpc85xx_rdb_setup_arch(void)
 
 	}
 #endif
-
-qe_fail:
 #endif	/* CONFIG_QUICC_ENGINE */
 
 	printk(KERN_INFO "MPC85xx RDB board from Freescale Semiconductor\n");
@@ -160,6 +137,7 @@ machine_arch_initcall(p2020_rdb_pc, mpc85xx_common_publish_devices);
 machine_arch_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices);
 machine_arch_initcall(p1020_rdb, mpc85xx_common_publish_devices);
 machine_arch_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb_pd, mpc85xx_common_publish_devices);
 machine_arch_initcall(p1020_utm_pc, mpc85xx_common_publish_devices);
 machine_arch_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices);
 machine_arch_initcall(p1025_rdb, mpc85xx_common_publish_devices);
@@ -193,6 +171,13 @@ static int __init p1020_rdb_pc_probe(void)
 	return of_flat_dt_is_compatible(root, "fsl,P1020RDB-PC");
 }
 
+static int __init p1020_rdb_pd_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,P1020RDB-PD");
+}
+
 static int __init p1021_rdb_pc_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
@@ -246,6 +231,7 @@ define_machine(p2020_rdb) {
 	.init_IRQ		= mpc85xx_rdb_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -260,6 +246,7 @@ define_machine(p1020_rdb) {
 	.init_IRQ		= mpc85xx_rdb_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -274,6 +261,7 @@ define_machine(p1021_rdb_pc) {
 	.init_IRQ		= mpc85xx_rdb_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -288,6 +276,7 @@ define_machine(p2020_rdb_pc) {
 	.init_IRQ		= mpc85xx_rdb_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -302,6 +291,7 @@ define_machine(p1025_rdb) {
 	.init_IRQ		= mpc85xx_rdb_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -316,6 +306,7 @@ define_machine(p1020_mbg_pc) {
 	.init_IRQ		= mpc85xx_rdb_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -330,6 +321,7 @@ define_machine(p1020_utm_pc) {
 	.init_IRQ		= mpc85xx_rdb_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -344,6 +336,22 @@ define_machine(p1020_rdb_pc) {
 	.init_IRQ		= mpc85xx_rdb_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+define_machine(p1020_rdb_pd) {
+	.name			= "P1020RDB-PD",
+	.probe			= p1020_rdb_pd_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -358,6 +366,7 @@ define_machine(p1024_rdb) {
 	.init_IRQ		= mpc85xx_rdb_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
index 0252961392d..ad1a3d438a9 100644
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -66,6 +66,8 @@ static int __init p1010_rdb_probe(void)
 
 	if (of_flat_dt_is_compatible(root, "fsl,P1010RDB"))
 		return 1;
+	if (of_flat_dt_is_compatible(root, "fsl,P1010RDB-PB"))
+		return 1;
 	return 0;
 }
 
@@ -76,6 +78,7 @@ define_machine(p1010_rdb) {
 	.init_IRQ		= p1010_rdb_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index e611e79f23c..6ac986d3f8a 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -567,6 +567,7 @@ define_machine(p1022_ds) {
 	.init_IRQ		= p1022_ds_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb	= fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
index 8c9297112b3..7a180f0308d 100644
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -147,6 +147,7 @@ define_machine(p1022_rdk) {
 	.init_IRQ		= p1022_rdk_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
diff --git a/arch/powerpc/platforms/85xx/p1023_rds.c b/arch/powerpc/platforms/85xx/p1023_rdb.c
index 9cc60a73883..d5b7509825d 100644
--- a/arch/powerpc/platforms/85xx/p1023_rds.c
+++ b/arch/powerpc/platforms/85xx/p1023_rdb.c
@@ -1,10 +1,10 @@
 /*
- * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ * Copyright 2010-2011, 2013 Freescale Semiconductor, Inc.
  *
  * Author: Roy Zang <tie-fei.zang@freescale.com>
  *
  * Description:
- * P1023 RDS Board Setup
+ * P1023 RDB Board Setup
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -41,12 +41,12 @@
  * Setup the architecture
  *
  */
-static void __init mpc85xx_rds_setup_arch(void)
+static void __init mpc85xx_rdb_setup_arch(void)
 {
 	struct device_node *np;
 
 	if (ppc_md.progress)
-		ppc_md.progress("p1023_rds_setup_arch()", 0);
+		ppc_md.progress("p1023_rdb_setup_arch()", 0);
 
 	/* Map BCSR area */
 	np = of_find_node_by_name(NULL, "bcsr");
@@ -85,9 +85,9 @@ static void __init mpc85xx_rds_setup_arch(void)
 	fsl_pci_assign_primary();
 }
 
-machine_arch_initcall(p1023_rds, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1023_rdb, mpc85xx_common_publish_devices);
 
-static void __init mpc85xx_rds_pic_init(void)
+static void __init mpc85xx_rdb_pic_init(void)
 {
 	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
 		MPIC_SINGLE_DEST_CPU,
@@ -98,25 +98,25 @@ static void __init mpc85xx_rds_pic_init(void)
 	mpic_init(mpic);
 }
 
-static int __init p1023_rds_probe(void)
+static int __init p1023_rdb_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
 
-	return of_flat_dt_is_compatible(root, "fsl,P1023RDS");
+	return of_flat_dt_is_compatible(root, "fsl,P1023RDB");
 
 }
 
-define_machine(p1023_rds) {
-	.name			= "P1023 RDS",
-	.probe			= p1023_rds_probe,
-	.setup_arch		= mpc85xx_rds_setup_arch,
-	.init_IRQ		= mpc85xx_rds_pic_init,
+define_machine(p1023_rdb) {
+	.name			= "P1023 RDB",
+	.probe			= p1023_rdb_probe,
+	.setup_arch		= mpc85xx_rdb_setup_arch,
+	.init_IRQ		= mpc85xx_rdb_pic_init,
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
 	.calibrate_decr		= generic_calibrate_decr,
 	.progress		= udbg_progress,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 };
-
diff --git a/arch/powerpc/platforms/85xx/p2041_rdb.c b/arch/powerpc/platforms/85xx/p2041_rdb.c
deleted file mode 100644
index 000c0892fc4..00000000000
--- a/arch/powerpc/platforms/85xx/p2041_rdb.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * P2041 RDB Setup
- *
- * Copyright 2011 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/phy.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <linux/of_platform.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-#include <asm/ehv_pic.h>
-
-#include "corenet_ds.h"
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init p2041_rdb_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-#ifdef CONFIG_SMP
-	extern struct smp_ops_t smp_85xx_ops;
-#endif
-
-	if (of_flat_dt_is_compatible(root, "fsl,P2041RDB"))
-		return 1;
-
-	/* Check if we're running under the Freescale hypervisor */
-	if (of_flat_dt_is_compatible(root, "fsl,P2041RDB-hv")) {
-		ppc_md.init_IRQ = ehv_pic_init;
-		ppc_md.get_irq = ehv_pic_get_irq;
-		ppc_md.restart = fsl_hv_restart;
-		ppc_md.power_off = fsl_hv_halt;
-		ppc_md.halt = fsl_hv_halt;
-#ifdef CONFIG_SMP
-		/*
-		 * Disable the timebase sync operations because we can't write
-		 * to the timebase registers under the hypervisor.
-		  */
-		smp_85xx_ops.give_timebase = NULL;
-		smp_85xx_ops.take_timebase = NULL;
-#endif
-		return 1;
-	}
-
-	return 0;
-}
-
-define_machine(p2041_rdb) {
-	.name			= "P2041 RDB",
-	.probe			= p2041_rdb_probe,
-	.setup_arch		= corenet_ds_setup_arch,
-	.init_IRQ		= corenet_ds_pic_init,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-#endif
-	.get_irq		= mpic_get_coreint_irq,
-	.restart		= fsl_rstcr_restart,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= udbg_progress,
-	.power_save		= e500_idle,
-};
-
-machine_arch_initcall(p2041_rdb, corenet_ds_publish_devices);
-
-#ifdef CONFIG_SWIOTLB
-machine_arch_initcall(p2041_rdb, swiotlb_setup_bus_notifier);
-#endif
diff --git a/arch/powerpc/platforms/85xx/p3041_ds.c b/arch/powerpc/platforms/85xx/p3041_ds.c
deleted file mode 100644
index b3edc205daa..00000000000
--- a/arch/powerpc/platforms/85xx/p3041_ds.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * P3041 DS Setup
- *
- * Maintained by Kumar Gala (see MAINTAINERS for contact information)
- *
- * Copyright 2009-2010 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/phy.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <linux/of_platform.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-#include <asm/ehv_pic.h>
-
-#include "corenet_ds.h"
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init p3041_ds_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-#ifdef CONFIG_SMP
-	extern struct smp_ops_t smp_85xx_ops;
-#endif
-
-	if (of_flat_dt_is_compatible(root, "fsl,P3041DS"))
-		return 1;
-
-	/* Check if we're running under the Freescale hypervisor */
-	if (of_flat_dt_is_compatible(root, "fsl,P3041DS-hv")) {
-		ppc_md.init_IRQ = ehv_pic_init;
-		ppc_md.get_irq = ehv_pic_get_irq;
-		ppc_md.restart = fsl_hv_restart;
-		ppc_md.power_off = fsl_hv_halt;
-		ppc_md.halt = fsl_hv_halt;
-#ifdef CONFIG_SMP
-		/*
-		 * Disable the timebase sync operations because we can't write
-		 * to the timebase registers under the hypervisor.
-		  */
-		smp_85xx_ops.give_timebase = NULL;
-		smp_85xx_ops.take_timebase = NULL;
-#endif
-		return 1;
-	}
-
-	return 0;
-}
-
-define_machine(p3041_ds) {
-	.name			= "P3041 DS",
-	.probe			= p3041_ds_probe,
-	.setup_arch		= corenet_ds_setup_arch,
-	.init_IRQ		= corenet_ds_pic_init,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-#endif
-	.get_irq		= mpic_get_coreint_irq,
-	.restart		= fsl_rstcr_restart,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= udbg_progress,
-	.power_save		= e500_idle,
-};
-
-machine_arch_initcall(p3041_ds, corenet_ds_publish_devices);
-
-#ifdef CONFIG_SWIOTLB
-machine_arch_initcall(p3041_ds, swiotlb_setup_bus_notifier);
-#endif
diff --git a/arch/powerpc/platforms/85xx/p4080_ds.c b/arch/powerpc/platforms/85xx/p4080_ds.c
deleted file mode 100644
index 54df10632ae..00000000000
--- a/arch/powerpc/platforms/85xx/p4080_ds.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * P4080 DS Setup
- *
- * Maintained by Kumar Gala (see MAINTAINERS for contact information)
- *
- * Copyright 2009 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <linux/of_platform.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-#include <asm/ehv_pic.h>
-
-#include "corenet_ds.h"
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init p4080_ds_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-#ifdef CONFIG_SMP
-	extern struct smp_ops_t smp_85xx_ops;
-#endif
-
-	if (of_flat_dt_is_compatible(root, "fsl,P4080DS"))
-		return 1;
-
-	/* Check if we're running under the Freescale hypervisor */
-	if (of_flat_dt_is_compatible(root, "fsl,P4080DS-hv")) {
-		ppc_md.init_IRQ = ehv_pic_init;
-		ppc_md.get_irq = ehv_pic_get_irq;
-		ppc_md.restart = fsl_hv_restart;
-		ppc_md.power_off = fsl_hv_halt;
-		ppc_md.halt = fsl_hv_halt;
-#ifdef CONFIG_SMP
-		/*
-		 * Disable the timebase sync operations because we can't write
-		 * to the timebase registers under the hypervisor.
-		  */
-		smp_85xx_ops.give_timebase = NULL;
-		smp_85xx_ops.take_timebase = NULL;
-#endif
-		return 1;
-	}
-
-	return 0;
-}
-
-define_machine(p4080_ds) {
-	.name			= "P4080 DS",
-	.probe			= p4080_ds_probe,
-	.setup_arch		= corenet_ds_setup_arch,
-	.init_IRQ		= corenet_ds_pic_init,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-#endif
-	.get_irq		= mpic_get_coreint_irq,
-	.restart		= fsl_rstcr_restart,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= udbg_progress,
-	.power_save		= e500_idle,
-};
-
-machine_arch_initcall(p4080_ds, corenet_ds_publish_devices);
-#ifdef CONFIG_SWIOTLB
-machine_arch_initcall(p4080_ds, swiotlb_setup_bus_notifier);
-#endif
diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c
deleted file mode 100644
index 753a42c29d4..00000000000
--- a/arch/powerpc/platforms/85xx/p5020_ds.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * P5020 DS Setup
- *
- * Maintained by Kumar Gala (see MAINTAINERS for contact information)
- *
- * Copyright 2009-2010 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/phy.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <linux/of_platform.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-#include <asm/ehv_pic.h>
-
-#include "corenet_ds.h"
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init p5020_ds_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-#ifdef CONFIG_SMP
-	extern struct smp_ops_t smp_85xx_ops;
-#endif
-
-	if (of_flat_dt_is_compatible(root, "fsl,P5020DS"))
-		return 1;
-
-	/* Check if we're running under the Freescale hypervisor */
-	if (of_flat_dt_is_compatible(root, "fsl,P5020DS-hv")) {
-		ppc_md.init_IRQ = ehv_pic_init;
-		ppc_md.get_irq = ehv_pic_get_irq;
-		ppc_md.restart = fsl_hv_restart;
-		ppc_md.power_off = fsl_hv_halt;
-		ppc_md.halt = fsl_hv_halt;
-#ifdef CONFIG_SMP
-		/*
-		 * Disable the timebase sync operations because we can't write
-		 * to the timebase registers under the hypervisor.
-		  */
-		smp_85xx_ops.give_timebase = NULL;
-		smp_85xx_ops.take_timebase = NULL;
-#endif
-		return 1;
-	}
-
-	return 0;
-}
-
-define_machine(p5020_ds) {
-	.name			= "P5020 DS",
-	.probe			= p5020_ds_probe,
-	.setup_arch		= corenet_ds_setup_arch,
-	.init_IRQ		= corenet_ds_pic_init,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-#endif
-/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
-#ifdef CONFIG_PPC64
-	.get_irq		= mpic_get_irq,
-#else
-	.get_irq		= mpic_get_coreint_irq,
-#endif
-	.restart		= fsl_rstcr_restart,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= udbg_progress,
-#ifdef CONFIG_PPC64
-	.power_save		= book3e_idle,
-#else
-	.power_save		= e500_idle,
-#endif
-};
-
-machine_arch_initcall(p5020_ds, corenet_ds_publish_devices);
-
-#ifdef CONFIG_SWIOTLB
-machine_arch_initcall(p5020_ds, swiotlb_setup_bus_notifier);
-#endif
diff --git a/arch/powerpc/platforms/85xx/p5040_ds.c b/arch/powerpc/platforms/85xx/p5040_ds.c
deleted file mode 100644
index 11381851828..00000000000
--- a/arch/powerpc/platforms/85xx/p5040_ds.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * P5040 DS Setup
- *
- * Copyright 2009-2010 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <linux/of_fdt.h>
-
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-#include <asm/ehv_pic.h>
-
-#include "corenet_ds.h"
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init p5040_ds_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-#ifdef CONFIG_SMP
-	extern struct smp_ops_t smp_85xx_ops;
-#endif
-
-	if (of_flat_dt_is_compatible(root, "fsl,P5040DS"))
-		return 1;
-
-	/* Check if we're running under the Freescale hypervisor */
-	if (of_flat_dt_is_compatible(root, "fsl,P5040DS-hv")) {
-		ppc_md.init_IRQ = ehv_pic_init;
-		ppc_md.get_irq = ehv_pic_get_irq;
-		ppc_md.restart = fsl_hv_restart;
-		ppc_md.power_off = fsl_hv_halt;
-		ppc_md.halt = fsl_hv_halt;
-#ifdef CONFIG_SMP
-		/*
-		 * Disable the timebase sync operations because we can't write
-		 * to the timebase registers under the hypervisor.
-		  */
-		smp_85xx_ops.give_timebase = NULL;
-		smp_85xx_ops.take_timebase = NULL;
-#endif
-		return 1;
-	}
-
-	return 0;
-}
-
-define_machine(p5040_ds) {
-	.name			= "P5040 DS",
-	.probe			= p5040_ds_probe,
-	.setup_arch		= corenet_ds_setup_arch,
-	.init_IRQ		= corenet_ds_pic_init,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-#endif
-/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
-#ifdef CONFIG_PPC64
-	.get_irq		= mpic_get_irq,
-#else
-	.get_irq		= mpic_get_coreint_irq,
-#endif
-	.restart		= fsl_rstcr_restart,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= udbg_progress,
-#ifdef CONFIG_PPC64
-	.power_save		= book3e_idle,
-#else
-	.power_save		= e500_idle,
-#endif
-};
-
-machine_arch_initcall(p5040_ds, corenet_ds_publish_devices);
-
-#ifdef CONFIG_SWIOTLB
-machine_arch_initcall(p5040_ds, swiotlb_setup_bus_notifier);
-#endif
diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c
index 6a7704b92c3..3daff7c6356 100644
--- a/arch/powerpc/platforms/85xx/ppa8548.c
+++ b/arch/powerpc/platforms/85xx/ppa8548.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/reboot.h>
 #include <linux/seq_file.h>
+#include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
index 5cefc5a9a14..7f267329354 100644
--- a/arch/powerpc/platforms/85xx/qemu_e500.c
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -66,6 +66,7 @@ define_machine(qemu_e500) {
 	.init_IRQ		= qemu_e500_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_coreint_irq,
 	.restart		= fsl_rstcr_restart,
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
index f6212182591..b07214666d6 100644
--- a/arch/powerpc/platforms/85xx/sbc8548.c
+++ b/arch/powerpc/platforms/85xx/sbc8548.c
@@ -135,6 +135,7 @@ define_machine(sbc8548) {
 	.restart	= fsl_rstcr_restart,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.calibrate_decr = generic_calibrate_decr,
 	.progress	= udbg_progress,
diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c
index 7179726ba5c..bb75add6708 100644
--- a/arch/powerpc/platforms/85xx/sgy_cts1000.c
+++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c
@@ -14,8 +14,8 @@
 #include <linux/platform_device.h>
 #include <linux/device.h>
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/of_gpio.h>
+#include <linux/of_irq.h>
 #include <linux/workqueue.h>
 #include <linux/reboot.h>
 #include <linux/interrupt.h>
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 6a1759939c6..ba093f55367 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/kexec.h>
 #include <linux/highmem.h>
 #include <linux/cpu.h>
@@ -26,6 +27,7 @@
 #include <asm/cacheflush.h>
 #include <asm/dbell.h>
 #include <asm/fsl_guts.h>
+#include <asm/code-patching.h>
 
 #include <sysdev/fsl_soc.h>
 #include <sysdev/mpic.h>
@@ -69,7 +71,32 @@ static void mpc85xx_give_timebase(void)
 	tb_req = 0;
 
 	mpc85xx_timebase_freeze(1);
+#ifdef CONFIG_PPC64
+	/*
+	 * e5500/e6500 have a workaround for erratum A-006958 in place
+	 * that will reread the timebase until TBL is non-zero.
+	 * That would be a bad thing when the timebase is frozen.
+	 *
+	 * Thus, we read it manually, and instead of checking that
+	 * TBL is non-zero, we ensure that TB does not change.  We don't
+	 * do that for the main mftb implementation, because it requires
+	 * a scratch register
+	 */
+	{
+		u64 prev;
+
+		asm volatile("mfspr %0, %1" : "=r" (timebase) :
+			     "i" (SPRN_TBRL));
+
+		do {
+			prev = timebase;
+			asm volatile("mfspr %0, %1" : "=r" (timebase) :
+				     "i" (SPRN_TBRL));
+		} while (prev != timebase);
+	}
+#else
 	timebase = get_tb();
+#endif
 	mb();
 	tb_valid = 1;
 
@@ -99,7 +126,7 @@ static void mpc85xx_take_timebase(void)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void __cpuinit smp_85xx_mach_cpu_die(void)
+static void smp_85xx_mach_cpu_die(void)
 {
 	unsigned int cpu = smp_processor_id();
 	u32 tmp;
@@ -141,7 +168,7 @@ static inline u32 read_spin_table_addr_l(void *spin_table)
 	return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l);
 }
 
-static int __cpuinit smp_85xx_kick_cpu(int nr)
+static int smp_85xx_kick_cpu(int nr)
 {
 	unsigned long flags;
 	const u64 *cpu_rel_addr;
@@ -241,7 +268,7 @@ out:
 	flush_spin_table(spin_table);
 	out_be32(&spin_table->pir, hw_cpu);
 	out_be64((u64 *)(&spin_table->addr_h),
-	  __pa((u64)*((unsigned long long *)generic_secondary_smp_init)));
+		__pa(ppc_function_entry(generic_secondary_smp_init)));
 	flush_spin_table(spin_table);
 #endif
 
@@ -255,6 +282,7 @@ out:
 
 struct smp_ops_t smp_85xx_ops = {
 	.kick_cpu = smp_85xx_kick_cpu,
+	.cpu_bootable = smp_generic_cpu_bootable,
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_disable	= generic_cpu_disable,
 	.cpu_die	= generic_cpu_die,
@@ -362,15 +390,18 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image)
 }
 #endif /* CONFIG_KEXEC */
 
-static void __cpuinit smp_85xx_setup_cpu(int cpu_nr)
+static void smp_85xx_basic_setup(int cpu_nr)
 {
-	if (smp_85xx_ops.probe == smp_mpic_probe)
-		mpic_setup_this_cpu();
-
 	if (cpu_has_feature(CPU_FTR_DBELL))
 		doorbell_setup_this_cpu();
 }
 
+static void smp_85xx_setup_cpu(int cpu_nr)
+{
+	mpic_setup_this_cpu();
+	smp_85xx_basic_setup(cpu_nr);
+}
+
 static const struct of_device_id mpc85xx_smp_guts_ids[] = {
 	{ .compatible = "fsl,mpc8572-guts", },
 	{ .compatible = "fsl,p1020-guts", },
@@ -385,13 +416,14 @@ void __init mpc85xx_smp_init(void)
 {
 	struct device_node *np;
 
-	smp_85xx_ops.setup_cpu = smp_85xx_setup_cpu;
 
 	np = of_find_node_by_type(NULL, "open-pic");
 	if (np) {
 		smp_85xx_ops.probe = smp_mpic_probe;
+		smp_85xx_ops.setup_cpu = smp_85xx_setup_cpu;
 		smp_85xx_ops.message_pass = smp_mpic_message_pass;
-	}
+	} else
+		smp_85xx_ops.setup_cpu = smp_85xx_basic_setup;
 
 	if (cpu_has_feature(CPU_FTR_DBELL)) {
 		/*
@@ -400,6 +432,7 @@ void __init mpc85xx_smp_init(void)
 		 */
 		smp_85xx_ops.message_pass = NULL;
 		smp_85xx_ops.cause_ipi = doorbell_cause_ipi;
+		smp_85xx_ops.probe = NULL;
 	}
 
 	np = of_find_matching_node(NULL, mpc85xx_smp_guts_ids);
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index 3bbbf748948..55a9682b952 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -9,6 +9,8 @@
  */
 
 #include <linux/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/io.h>
 
diff --git a/arch/powerpc/platforms/85xx/t4240_qds.c b/arch/powerpc/platforms/85xx/t4240_qds.c
deleted file mode 100644
index 5998e9f3330..00000000000
--- a/arch/powerpc/platforms/85xx/t4240_qds.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * T4240 QDS Setup
- *
- * Maintained by Kumar Gala (see MAINTAINERS for contact information)
- *
- * Copyright 2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/phy.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <linux/of_platform.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-#include <asm/ehv_pic.h>
-
-#include "corenet_ds.h"
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init t4240_qds_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-#ifdef CONFIG_SMP
-	extern struct smp_ops_t smp_85xx_ops;
-#endif
-
-	if (of_flat_dt_is_compatible(root, "fsl,T4240QDS"))
-		return 1;
-
-	/* Check if we're running under the Freescale hypervisor */
-	if (of_flat_dt_is_compatible(root, "fsl,T4240QDS-hv")) {
-		ppc_md.init_IRQ = ehv_pic_init;
-		ppc_md.get_irq = ehv_pic_get_irq;
-		ppc_md.restart = fsl_hv_restart;
-		ppc_md.power_off = fsl_hv_halt;
-		ppc_md.halt = fsl_hv_halt;
-#ifdef CONFIG_SMP
-		/*
-		 * Disable the timebase sync operations because we can't write
-		 * to the timebase registers under the hypervisor.
-		  */
-		smp_85xx_ops.give_timebase = NULL;
-		smp_85xx_ops.take_timebase = NULL;
-#endif
-		return 1;
-	}
-
-	return 0;
-}
-
-define_machine(t4240_qds) {
-	.name			= "T4240 QDS",
-	.probe			= t4240_qds_probe,
-	.setup_arch		= corenet_ds_setup_arch,
-	.init_IRQ		= corenet_ds_pic_init,
-#ifdef CONFIG_PCI
-	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
-#endif
-/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
-#ifdef CONFIG_PPC64
-	.get_irq		= mpic_get_irq,
-#else
-	.get_irq		= mpic_get_coreint_irq,
-#endif
-	.restart		= fsl_rstcr_restart,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= udbg_progress,
-#ifdef CONFIG_PPC64
-	.power_save		= book3e_idle,
-#else
-	.power_save		= e500_idle,
-#endif
-};
-
-machine_arch_initcall(t4240_qds, corenet_ds_publish_devices);
-
-#ifdef CONFIG_SWIOTLB
-machine_arch_initcall(t4240_qds, swiotlb_setup_bus_notifier);
-#endif
diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
new file mode 100644
index 00000000000..1eadb6d0dc6
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2010-2011, 2013 Freescale Semiconductor, Inc.
+ *
+ * Author: Michael Johnston <michael.johnston@freescale.com>
+ *
+ * Description:
+ * TWR-P102x Board Setup
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/qe.h>
+#include <asm/qe_ic.h>
+#include <asm/fsl_guts.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+static void __init twr_p1025_pic_init(void)
+{
+	struct mpic *mpic;
+
+#ifdef CONFIG_QUICC_ENGINE
+	struct device_node *np;
+#endif
+
+	mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+			MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+
+#ifdef CONFIG_QUICC_ENGINE
+	np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
+	if (np) {
+		qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
+				qe_ic_cascade_high_mpic);
+		of_node_put(np);
+	} else
+		pr_err("Could not find qe-ic node\n");
+#endif
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init twr_p1025_setup_arch(void)
+{
+#ifdef CONFIG_QUICC_ENGINE
+	struct device_node *np;
+#endif
+
+	if (ppc_md.progress)
+		ppc_md.progress("twr_p1025_setup_arch()", 0);
+
+	mpc85xx_smp_init();
+
+	fsl_pci_assign_primary();
+
+#ifdef CONFIG_QUICC_ENGINE
+	mpc85xx_qe_init();
+	mpc85xx_qe_par_io_init();
+
+#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
+	if (machine_is(twr_p1025)) {
+		struct ccsr_guts __iomem *guts;
+
+		np = of_find_compatible_node(NULL, NULL, "fsl,p1021-guts");
+		if (np) {
+			guts = of_iomap(np, 0);
+			if (!guts)
+				pr_err("twr_p1025: could not map global utilities register\n");
+			else {
+			/* P1025 has pins muxed for QE and other functions. To
+			 * enable QE UEC mode, we need to set bit QE0 for UCC1
+			 * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
+			 * and QE12 for QE MII management signals in PMUXCR
+			 * register.
+			 * Set QE mux bits in PMUXCR */
+			setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
+					MPC85xx_PMUXCR_QE(3) |
+					MPC85xx_PMUXCR_QE(9) |
+					MPC85xx_PMUXCR_QE(12));
+			iounmap(guts);
+
+#if defined(CONFIG_SERIAL_QE)
+			/* On P1025TWR board, the UCC7 acted as UART port.
+			 * However, The UCC7's CTS pin is low level in default,
+			 * it will impact the transmission in full duplex
+			 * communication. So disable the Flow control pin PA18.
+			 * The UCC7 UART just can use RXD and TXD pins.
+			 */
+			par_io_config_pin(0, 18, 0, 0, 0, 0);
+#endif
+			/* Drive PB29 to CPLD low - CPLD will then change
+			 * muxing from LBC to QE */
+			par_io_config_pin(1, 29, 1, 0, 0, 0);
+			par_io_data_set(1, 29, 0);
+			}
+			of_node_put(np);
+		}
+	}
+#endif
+#endif	/* CONFIG_QUICC_ENGINE */
+
+	pr_info("TWR-P1025 board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(twr_p1025, mpc85xx_common_publish_devices);
+
+static int __init twr_p1025_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,TWR-P1025");
+}
+
+define_machine(twr_p1025) {
+	.name			= "TWR-P1025",
+	.probe			= twr_p1025_probe,
+	.setup_arch		= twr_p1025_setup_arch,
+	.init_IRQ		= twr_p1025_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index dcbf7e42dce..1a9c1085855 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -170,6 +170,7 @@ define_machine(xes_mpc8572) {
 	.init_IRQ		= xes_mpc85xx_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -184,6 +185,7 @@ define_machine(xes_mpc8548) {
 	.init_IRQ		= xes_mpc85xx_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
@@ -198,6 +200,7 @@ define_machine(xes_mpc8540) {
 	.init_IRQ		= xes_mpc85xx_pic_init,
 #ifdef CONFIG_PCI
 	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+	.pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c
index 9982f57c98b..d5b98c0f958 100644
--- a/arch/powerpc/platforms/86xx/pic.c
+++ b/arch/powerpc/platforms/86xx/pic.c
@@ -10,6 +10,7 @@
 #include <linux/stddef.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 
 #include <asm/mpic.h>
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index 8dec3c0911a..bd6f1a1cf92 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -45,7 +45,6 @@ config PPC_EP88XC
 config PPC_ADDER875
 	bool "Analogue & Micro Adder 875"
 	select CPM1
-	select REDBOOT
 	help
 	  This enables support for the Analogue & Micro Adder 875
 	  board.
diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c
index 7d9ac6040d6..e62166681d0 100644
--- a/arch/powerpc/platforms/8xx/ep88xc.c
+++ b/arch/powerpc/platforms/8xx/ep88xc.c
@@ -10,6 +10,8 @@
  */
 
 #include <linux/init.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 1e121088826..587a2828b06 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -43,6 +43,7 @@ static irqreturn_t timebase_interrupt(int irq, void *dev)
 
 static struct irqaction tbint_irqaction = {
 	.handler = timebase_interrupt,
+	.flags = IRQF_NO_THREAD,
 	.name = "tbint",
 };
 
@@ -218,19 +219,12 @@ void mpc8xx_restart(char *cmd)
 
 static void cpm_cascade(unsigned int irq, struct irq_desc *desc)
 {
-	struct irq_chip *chip;
-	int cascade_irq;
-
-	if ((cascade_irq = cpm_get_irq()) >= 0) {
-		struct irq_desc *cdesc = irq_to_desc(cascade_irq);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	int cascade_irq = cpm_get_irq();
 
+	if (cascade_irq >= 0)
 		generic_handle_irq(cascade_irq);
 
-		chip = irq_desc_get_chip(cdesc);
-		chip->irq_eoi(&cdesc->irq_data);
-	}
-
-	chip = irq_desc_get_chip(desc);
 	chip->irq_eoi(&desc->irq_data);
 }
 
diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
index 866feff83c9..63084640c5c 100644
--- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
@@ -15,6 +15,8 @@
  */
 
 #include <linux/init.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 
 #include <asm/io.h>
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
index 5d98398c2f5..c1262581b63 100644
--- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -25,6 +25,8 @@
 #include <linux/fs_uart_pd.h>
 #include <linux/fsl_devices.h>
 #include <linux/mii.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 
 #include <asm/delay.h>
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
index 8d21ab70e06..251aba8759e 100644
--- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -28,6 +28,7 @@
 #include <linux/fs_uart_pd.h>
 #include <linux/fsl_devices.h>
 #include <linux/mii.h>
+#include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 
 #include <asm/delay.h>
@@ -48,7 +49,7 @@ struct cpm_pin {
 	int port, pin, flags;
 };
 
-static struct __initdata cpm_pin tqm8xx_pins[] = {
+static struct cpm_pin tqm8xx_pins[] __initdata = {
 	/* SMC1 */
 	{CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
 	{CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
@@ -63,7 +64,7 @@ static struct __initdata cpm_pin tqm8xx_pins[] = {
 	{CPM_PORTC, 11, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO},
 };
 
-static struct __initdata cpm_pin tqm8xx_fec_pins[] = {
+static struct cpm_pin tqm8xx_fec_pins[] __initdata = {
 	/* MII */
 	{CPM_PORTD, 3, CPM_PIN_OUTPUT},
 	{CPM_PORTD, 4, CPM_PIN_OUTPUT},
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index b62aab3e22e..391b3f6b54a 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -19,7 +19,6 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig"
 source "arch/powerpc/platforms/44x/Kconfig"
 source "arch/powerpc/platforms/40x/Kconfig"
 source "arch/powerpc/platforms/amigaone/Kconfig"
-source "arch/powerpc/platforms/wsp/Kconfig"
 
 config KVM_GUEST
 	bool "KVM Guest support"
@@ -86,6 +85,27 @@ config MPIC
 	bool
 	default n
 
+config MPIC_TIMER
+	bool "MPIC Global Timer"
+	depends on MPIC && FSL_SOC
+	default n
+	help
+	  The MPIC global timer is a hardware timer inside the
+	  Freescale PIC complying with OpenPIC standard. When the
+	  specified interval times out, the hardware timer generates
+	  an interrupt. The driver currently is only tested on fsl
+	  chip, but it can potentially support other global timers
+	  complying with the OpenPIC standard.
+
+config FSL_MPIC_TIMER_WAKEUP
+	tristate "Freescale MPIC global timer wakeup driver"
+	depends on FSL_SOC &&  MPIC_TIMER && PM
+	default n
+	help
+	  The driver provides a way to wake up the system by MPIC
+	  timer.
+	  e.g. "echo 5 > /sys/devices/system/mpic/timer_wakeup"
+
 config PPC_EPAPR_HV_PIC
 	bool
 	default n
@@ -164,6 +184,11 @@ config IBMEBUS
 	help
 	  Bus device driver for GX bus based adapters.
 
+config EEH
+	bool
+	depends on (PPC_POWERNV || PPC_PSERIES) && PCI
+	default y
+
 config PPC_MPC106
 	bool
 	default n
@@ -176,54 +201,18 @@ config PPC_P7_NAP
 	bool
 	default n
 
-config PPC_INDIRECT_IO
-	bool
-	select GENERIC_IOMAP
-
 config PPC_INDIRECT_PIO
 	bool
-	select PPC_INDIRECT_IO
+	select GENERIC_IOMAP
 
 config PPC_INDIRECT_MMIO
 	bool
-	select PPC_INDIRECT_IO
 
 config PPC_IO_WORKAROUNDS
 	bool
 
 source "drivers/cpufreq/Kconfig"
 
-menu "CPU Frequency drivers"
-	depends on CPU_FREQ
-
-config CPU_FREQ_PMAC
-	bool "Support for Apple PowerBooks"
-	depends on ADB_PMU && PPC32
-	select CPU_FREQ_TABLE
-	help
-	  This adds support for frequency switching on Apple PowerBooks,
-	  this currently includes some models of iBook & Titanium
-	  PowerBook.
-
-config CPU_FREQ_PMAC64
-	bool "Support for some Apple G5s"
-	depends on PPC_PMAC && PPC64
-	select CPU_FREQ_TABLE
-	help
-	  This adds support for frequency switching on Apple iMac G5,
-	  and some of the more recent desktop G5 machines as well.
-
-config PPC_PASEMI_CPUFREQ
-	bool "Support for PA Semi PWRficient"
-	depends on PPC_PASEMI
-	default y
-	select CPU_FREQ_TABLE
-	help
-	  This adds the support for frequency switching on PA Semi
-	  PWRficient processors.
-
-endmenu
-
 menu "CPUIdle driver"
 
 source "drivers/cpuidle/Kconfig"
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 54f3936001a..a41bd023647 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -71,6 +71,9 @@ config PPC_BOOK3S_64
 	select PPC_FPU
 	select PPC_HAVE_PMU_SUPPORT
 	select SYS_SUPPORTS_HUGETLBFS
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
+	select ARCH_SUPPORTS_NUMA_BALANCING
+	select IRQ_WORK
 
 config PPC_BOOK3E_64
 	bool "Embedded processors"
@@ -92,21 +95,35 @@ choice
 
 config GENERIC_CPU
 	bool "Generic"
+	depends on !CPU_LITTLE_ENDIAN
 
 config CELL_CPU
 	bool "Cell Broadband Engine"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
 
 config POWER4_CPU
 	bool "POWER4"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
 
 config POWER5_CPU
 	bool "POWER5"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
 
 config POWER6_CPU
 	bool "POWER6"
+	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
 
 config POWER7_CPU
 	bool "POWER7"
+	depends on PPC_BOOK3S_64
+
+config E5500_CPU
+	bool "Freescale e5500"
+	depends on E500
+
+config E6500_CPU
+	bool "Freescale e6500"
+	depends on E500
 
 endchoice
 
@@ -131,10 +148,6 @@ config POWER4
 	depends on PPC64 && PPC_BOOK3S
 	def_bool y
 
-config PPC_A2
-	bool
-	depends on PPC_BOOK3E_64
-
 config TUNE_CELL
 	bool "Optimize for Cell Broadband Engine"
 	depends on PPC64 && PPC_BOOK3S
@@ -158,6 +171,7 @@ config E500
 config PPC_E500MC
 	bool "e500mc Support"
 	select PPC_FPU
+	select COMMON_CLK
 	depends on E500
 	help
 	  This must be enabled for running on e500mc (and derivatives
@@ -262,7 +276,7 @@ config VSX
 
 config PPC_ICSWX
 	bool "Support for PowerPC icswx coprocessor instruction"
-	depends on POWER4 || PPC_A2
+	depends on POWER4
 	default n
 	---help---
 
@@ -387,3 +401,33 @@ config PPC_DOORBELL
 	default n
 
 endmenu
+
+choice
+	prompt "Endianness selection"
+	default CPU_BIG_ENDIAN
+	help
+	  This option selects whether a big endian or little endian kernel will
+	  be built.
+
+config CPU_BIG_ENDIAN
+	bool "Build big endian kernel"
+	help
+	  Build a big endian kernel.
+
+	  If unsure, select this option.
+
+config CPU_LITTLE_ENDIAN
+	bool "Build little endian kernel"
+	select PPC64_BOOT_WRAPPER
+	help
+	  Build a little endian kernel.
+
+	  Note that if cross compiling a little endian kernel,
+	  CROSS_COMPILE must point to a toolchain capable of targeting
+	  little endian powerpc.
+
+endchoice
+
+config PPC64_BOOT_WRAPPER
+	def_bool n
+	depends on CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index 879b4a44849..469ef170d21 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -22,4 +22,3 @@ obj-$(CONFIG_PPC_CELL)		+= cell/
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
 obj-$(CONFIG_AMIGAONE)		+= amigaone/
-obj-$(CONFIG_PPC_WSP)		+= wsp/
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 246e1d8b3af..d4d245c0d78 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -111,7 +111,7 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group,
 		DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
 
 	if (rflags & _PAGE_NO_CACHE)
-		hpte_r &= ~_PAGE_COHERENT;
+		hpte_r &= ~HPTE_R_M;
 
 	raw_spin_lock(&beat_htab_lock);
 	lpar_rc = beat_read_mask(hpte_group);
@@ -185,7 +185,8 @@ static void beat_lpar_hptab_clear(void)
 static long beat_lpar_hpte_updatepp(unsigned long slot,
 				    unsigned long newpp,
 				    unsigned long vpn,
-				    int psize, int ssize, int local)
+				    int psize, int apsize,
+				    int ssize, int local)
 {
 	unsigned long lpar_rc;
 	u64 dummy0, dummy1;
@@ -274,7 +275,8 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 }
 
 static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
-					 int psize, int ssize, int local)
+				      int psize, int apsize,
+				      int ssize, int local)
 {
 	unsigned long want_v;
 	unsigned long lpar_rc;
@@ -335,7 +337,7 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
 		DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
 
 	if (rflags & _PAGE_NO_CACHE)
-		hpte_r &= ~_PAGE_COHERENT;
+		hpte_r &= ~HPTE_R_M;
 
 	/* insert into not-volted entry */
 	lpar_rc = beat_insert_htab_entry3(0, hpte_group, hpte_v, hpte_r,
@@ -364,9 +366,10 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
  * already zero.  For now I am paranoid.
  */
 static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
-				    unsigned long newpp,
-				    unsigned long vpn,
-				    int psize, int ssize, int local)
+				       unsigned long newpp,
+				       unsigned long vpn,
+				       int psize, int apsize,
+				       int ssize, int local)
 {
 	unsigned long lpar_rc;
 	unsigned long want_v;
@@ -394,7 +397,8 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
 }
 
 static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
-					 int psize, int ssize, int local)
+					 int psize, int apsize,
+					 int ssize, int local)
 {
 	unsigned long want_v;
 	unsigned long lpar_rc;
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
index 8c6dc42ecf6..9e5dfbcc00a 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -239,7 +239,7 @@ void __init beatic_init_IRQ(void)
 	ppc_md.get_irq = beatic_get_irq;
 
 	/* Allocate an irq host */
-	beatic_host = irq_domain_add_nomap(NULL, 0, &beatic_pic_host_ops, NULL);
+	beatic_host = irq_domain_add_nomap(NULL, ~0, &beatic_pic_host_ops, NULL);
 	BUG_ON(beatic_host == NULL);
 	irq_set_default_host(beatic_host);
 }
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
index 94560db788b..2c15ff09448 100644
--- a/arch/powerpc/platforms/cell/cbe_thermal.c
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -125,7 +125,7 @@ static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, i
 static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
 {
 	u64 reg_value;
-	int temp;
+	unsigned int temp;
 	u64 new_value;
 	int ret;
 
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
index 14be2bd358b..4278acfa2ed 100644
--- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -486,7 +486,6 @@ static __init int celleb_setup_pciex(struct device_node *node,
 				     struct pci_controller *phb)
 {
 	struct resource	r;
-	struct of_irq oirq;
 	int virq;
 
 	/* SMMIO registers; used inside this file */
@@ -507,12 +506,11 @@ static __init int celleb_setup_pciex(struct device_node *node,
 	phb->ops = &scc_pciex_pci_ops;
 
 	/* internal interrupt handler */
-	if (of_irq_map_one(node, 1, &oirq)) {
+	virq = irq_of_parse_and_map(node, 1);
+	if (!virq) {
 		pr_err("PCIEXC:Failed to map irq\n");
 		goto error;
 	}
-	virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
-				     oirq.size);
 	if (request_irq(virq, pciex_handle_internal_irq,
 			0, "pciex", (void *)phb)) {
 		pr_err("PCIEXC:Failed to request irq\n");
diff --git a/arch/powerpc/platforms/cell/celleb_scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c
index 9c339ec646f..c8eb5719382 100644
--- a/arch/powerpc/platforms/cell/celleb_scc_sio.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_sio.c
@@ -45,7 +45,7 @@ static int __init txx9_serial_init(void)
 	struct device_node *node;
 	int i;
 	struct uart_port req;
-	struct of_irq irq;
+	struct of_phandle_args irq;
 	struct resource res;
 
 	for_each_compatible_node(node, "serial", "toshiba,sio-scc") {
@@ -53,7 +53,7 @@ static int __init txx9_serial_init(void)
 			if (!(txx9_serial_bitmap & (1<<i)))
 				continue;
 
-			if (of_irq_map_one(node, i, &irq))
+			if (of_irq_parse_one(node, i, &irq))
 				continue;
 			if (of_address_to_resource(node,
 				txx9_scc_tab[i].index, &res))
@@ -66,8 +66,7 @@ static int __init txx9_serial_init(void)
 #ifdef CONFIG_SERIAL_TXX9_CONSOLE
 			req.membase = ioremap(req.mapbase, 0x24);
 #endif
-			req.irq = irq_create_of_mapping(irq.controller,
-				irq.specifier, irq.size);
+			req.irq = irq_create_of_mapping(&irq);
 			req.flags |= UPF_IOREMAP | UPF_BUGGY_UART
 				/*HAVE_CTS_LINE*/;
 			req.uartclk = 83300000;
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 2d42f3bb66d..8a106b4172e 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -215,7 +215,7 @@ void iic_request_IPIs(void)
 {
 	iic_request_ipi(PPC_MSG_CALL_FUNCTION);
 	iic_request_ipi(PPC_MSG_RESCHEDULE);
-	iic_request_ipi(PPC_MSG_CALL_FUNC_SINGLE);
+	iic_request_ipi(PPC_MSG_TICK_BROADCAST);
 	iic_request_ipi(PPC_MSG_DEBUGGER_BREAK);
 }
 
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 946306b1bb4..2b90ff8a93b 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -197,7 +197,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
 
 	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
 
-	for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE)
+	for (i = 0; i < npages; i++, uaddr += tbl->it_page_shift)
 		io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
 
 	mb();
@@ -430,7 +430,7 @@ static void cell_iommu_setup_hardware(struct cbe_iommu *iommu,
 {
 	cell_iommu_setup_stab(iommu, base, size, 0, 0);
 	iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0,
-					    IOMMU_PAGE_SHIFT);
+					    IOMMU_PAGE_SHIFT_4K);
 	cell_iommu_enable_hardware(iommu);
 }
 
@@ -487,8 +487,10 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
 	window->table.it_blocksize = 16;
 	window->table.it_base = (unsigned long)iommu->ptab;
 	window->table.it_index = iommu->nid;
-	window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT) + pte_offset;
-	window->table.it_size = size >> IOMMU_PAGE_SHIFT;
+	window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K;
+	window->table.it_offset =
+		(offset >> window->table.it_page_shift) + pte_offset;
+	window->table.it_size = size >> window->table.it_page_shift;
 
 	iommu_init_table(&window->table, iommu->nid);
 
@@ -697,7 +699,7 @@ static int __init cell_iommu_get_window(struct device_node *np,
 					 unsigned long *base,
 					 unsigned long *size)
 {
-	const void *dma_window;
+	const __be32 *dma_window;
 	unsigned long index;
 
 	/* Use ibm,dma-window if available, else, hard code ! */
@@ -773,7 +775,7 @@ static void __init cell_iommu_init_one(struct device_node *np,
 
 	/* Setup the iommu_table */
 	cell_iommu_setup_window(iommu, np, base, size,
-				offset >> IOMMU_PAGE_SHIFT);
+				offset >> IOMMU_PAGE_SHIFT_4K);
 }
 
 static void __init cell_disable_iommus(void)
@@ -1122,7 +1124,7 @@ static int __init cell_iommu_fixed_mapping_init(void)
 
 		cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize);
 		iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0,
-						    IOMMU_PAGE_SHIFT);
+						    IOMMU_PAGE_SHIFT_4K);
 		cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize,
 					     fbase, fsize);
 		cell_iommu_enable_hardware(iommu);
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 5ec1e47a0d7..e865d748179 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -123,7 +123,8 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order)
 
 	area->nid = nid;
 	area->order = order;
-	area->pages = alloc_pages_exact_node(area->nid, GFP_KERNEL|GFP_THISNODE,
+	area->pages = alloc_pages_exact_node(area->nid,
+						GFP_KERNEL|__GFP_THISNODE,
 						area->order);
 
 	if (!area->pages) {
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index d35dbbc8ec7..c8017a7bcab 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -40,6 +40,7 @@
 #include <asm/firmware.h>
 #include <asm/rtas.h>
 #include <asm/cputhreads.h>
+#include <asm/code-patching.h>
 
 #include "interrupt.h"
 #include <asm/udbg.h>
@@ -70,8 +71,8 @@ static cpumask_t of_spin_map;
 static inline int smp_startup_cpu(unsigned int lcpu)
 {
 	int status;
-	unsigned long start_here = __pa((u32)*((unsigned long *)
-					       generic_secondary_smp_init));
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
 	unsigned int pcpu;
 	int start_cpu;
 
@@ -136,25 +137,12 @@ static int smp_cell_kick_cpu(int nr)
 	return 0;
 }
 
-static int smp_cell_cpu_bootable(unsigned int nr)
-{
-	/* Special case - we inhibit secondary thread startup
-	 * during boot if the user requests it.  Odd-numbered
-	 * cpus are assumed to be secondary threads.
-	 */
-	if (system_state < SYSTEM_RUNNING &&
-	    cpu_has_feature(CPU_FTR_SMT) &&
-	    !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
-		return 0;
-
-	return 1;
-}
 static struct smp_ops_t bpa_iic_smp_ops = {
 	.message_pass	= iic_message_pass,
 	.probe		= smp_iic_probe,
 	.kick_cpu	= smp_cell_kick_cpu,
 	.setup_cpu	= smp_cell_setup_cpu,
-	.cpu_bootable	= smp_cell_cpu_bootable,
+	.cpu_bootable	= smp_generic_cpu_bootable,
 };
 
 /* This is called very early */
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 8e299447127..1f72f4ab635 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -235,12 +235,9 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
 	/* First, we check whether we have a real "interrupts" in the device
 	 * tree in case the device-tree is ever fixed
 	 */
-	struct of_irq oirq;
-	if (of_irq_map_one(pic->host->of_node, 0, &oirq) == 0) {
-		virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
-					     oirq.size);
+	virq = irq_of_parse_and_map(pic->host->of_node, 0);
+	if (virq)
 		return virq;
-	}
 
 	/* Now do the horrible hacks */
 	tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index 2bb6977c0a5..c3327f3d8cf 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -177,21 +177,20 @@ out:
 
 static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
 {
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 	int ret;
 	int i;
 
 	for (i=0; i < 3; i++) {
-		ret = of_irq_map_one(np, i, &oirq);
+		ret = of_irq_parse_one(np, i, &oirq);
 		if (ret) {
 			pr_debug("spu_new: failed to get irq %d\n", i);
 			goto err;
 		}
 		ret = -EINVAL;
-		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.specifier[0],
-			 oirq.controller->full_name);
-		spu->irqs[i] = irq_create_of_mapping(oirq.controller,
-					oirq.specifier, oirq.size);
+		pr_debug("  irq %d no 0x%x on %s\n", i, oirq.args[0],
+			 oirq.np->full_name);
+		spu->irqs[i] = irq_create_of_mapping(&oirq);
 		if (spu->irqs[i] == NO_IRQ) {
 			pr_debug("spu_new: failed to map it !\n");
 			goto err;
@@ -200,7 +199,7 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
 	return 0;
 
 err:
-	pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier,
+	pr_debug("failed to map irq %x for spu %s\n", *oirq.args,
 		spu->name);
 	for (; i >= 0; i--) {
 		if (spu->irqs[i] != NO_IRQ)
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
index db4e638cf40..5e6e0bad6db 100644
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/rcupdate.h>
+#include <linux/binfmts.h>
 
 #include <asm/spu.h>
 
@@ -110,6 +111,7 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
 	return ret;
 }
 
+#ifdef CONFIG_COREDUMP
 int elf_coredump_extra_notes_size(void)
 {
 	struct spufs_calls *calls;
@@ -126,7 +128,7 @@ int elf_coredump_extra_notes_size(void)
 	return ret;
 }
 
-int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
 {
 	struct spufs_calls *calls;
 	int ret;
@@ -135,12 +137,13 @@ int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset)
 	if (!calls)
 		return 0;
 
-	ret = calls->coredump_extra_notes_write(file, foffset);
+	ret = calls->coredump_extra_notes_write(cprm);
 
 	spufs_calls_put(calls);
 
 	return ret;
 }
+#endif
 
 void notify_spus_active(void)
 {
@@ -169,7 +172,7 @@ EXPORT_SYMBOL_GPL(register_spu_syscalls);
 void unregister_spu_syscalls(struct spufs_calls *calls)
 {
 	BUG_ON(spufs_calls->owner != calls->owner);
-	rcu_assign_pointer(spufs_calls, NULL);
+	RCU_INIT_POINTER(spufs_calls, NULL);
 	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(unregister_spu_syscalls);
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
index b9d5d678aa4..52a7d2596d3 100644
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -1,8 +1,9 @@
 
 obj-$(CONFIG_SPU_FS) += spufs.o
-spufs-y += inode.o file.o context.o syscalls.o coredump.o
+spufs-y += inode.o file.o context.o syscalls.o
 spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
 spufs-y += switch.o fault.o lscsa_alloc.o
+spufs-$(CONFIG_COREDUMP) += coredump.o
 
 # magic for the trace events
 CFLAGS_sched.o := -I$(src)
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index c9500ea7be2..be6212ddbf0 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -27,6 +27,8 @@
 #include <linux/gfp.h>
 #include <linux/list.h>
 #include <linux/syscalls.h>
+#include <linux/coredump.h>
+#include <linux/binfmts.h>
 
 #include <asm/uaccess.h>
 
@@ -48,44 +50,6 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer,
 	return ++ret; /* count trailing NULL */
 }
 
-/*
- * These are the only things you should do on a core-file: use only these
- * functions to write out all the necessary info.
- */
-static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset)
-{
-	unsigned long limit = rlimit(RLIMIT_CORE);
-	ssize_t written;
-
-	if (*foffset + nr > limit)
-		return -EIO;
-
-	written = file->f_op->write(file, addr, nr, &file->f_pos);
-	*foffset += written;
-
-	if (written != nr)
-		return -EIO;
-
-	return 0;
-}
-
-static int spufs_dump_align(struct file *file, char *buf, loff_t new_off,
-			    loff_t *foffset)
-{
-	int rc, size;
-
-	size = min((loff_t)PAGE_SIZE, new_off - *foffset);
-	memset(buf, 0, size);
-
-	rc = 0;
-	while (rc == 0 && new_off > *foffset) {
-		size = min((loff_t)PAGE_SIZE, new_off - *foffset);
-		rc = spufs_dump_write(file, buf, size, foffset);
-	}
-
-	return rc;
-}
-
 static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
 {
 	int i, sz, total = 0;
@@ -165,10 +129,10 @@ int spufs_coredump_extra_notes_size(void)
 }
 
 static int spufs_arch_write_note(struct spu_context *ctx, int i,
-				  struct file *file, int dfd, loff_t *foffset)
+				  struct coredump_params *cprm, int dfd)
 {
 	loff_t pos = 0;
-	int sz, rc, nread, total = 0;
+	int sz, rc, total = 0;
 	const int bufsz = PAGE_SIZE;
 	char *name;
 	char fullname[80], *buf;
@@ -186,42 +150,39 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
 	en.n_descsz = sz;
 	en.n_type = NT_SPU;
 
-	rc = spufs_dump_write(file, &en, sizeof(en), foffset);
-	if (rc)
-		goto out;
+	if (!dump_emit(cprm, &en, sizeof(en)))
+		goto Eio;
 
-	rc = spufs_dump_write(file, fullname, en.n_namesz, foffset);
-	if (rc)
-		goto out;
+	if (!dump_emit(cprm, fullname, en.n_namesz))
+		goto Eio;
 
-	rc = spufs_dump_align(file, buf, roundup(*foffset, 4), foffset);
-	if (rc)
-		goto out;
+	if (!dump_align(cprm, 4))
+		goto Eio;
 
 	do {
-		nread = do_coredump_read(i, ctx, buf, bufsz, &pos);
-		if (nread > 0) {
-			rc = spufs_dump_write(file, buf, nread, foffset);
-			if (rc)
-				goto out;
-			total += nread;
+		rc = do_coredump_read(i, ctx, buf, bufsz, &pos);
+		if (rc > 0) {
+			if (!dump_emit(cprm, buf, rc))
+				goto Eio;
+			total += rc;
 		}
-	} while (nread == bufsz && total < sz);
+	} while (rc == bufsz && total < sz);
 
-	if (nread < 0) {
-		rc = nread;
+	if (rc < 0)
 		goto out;
-	}
-
-	rc = spufs_dump_align(file, buf, roundup(*foffset - total + sz, 4),
-			      foffset);
 
+	if (!dump_skip(cprm,
+		       roundup(cprm->written - total + sz, 4) - cprm->written))
+		goto Eio;
 out:
 	free_page((unsigned long)buf);
 	return rc;
+Eio:
+	free_page((unsigned long)buf);
+	return -EIO;
 }
 
-int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
+int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
 {
 	struct spu_context *ctx;
 	int fd, j, rc;
@@ -233,7 +194,7 @@ int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset)
 			return rc;
 
 		for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
-			rc = spufs_arch_write_note(ctx, j, file, fd, foffset);
+			rc = spufs_arch_write_note(ctx, j, cprm, fd);
 			if (rc) {
 				spu_release_saved(ctx);
 				return rc;
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 35f77a42bed..87ba7cf99cd 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -238,7 +238,7 @@ const struct file_operations spufs_context_fops = {
 	.release	= spufs_dir_close,
 	.llseek		= dcache_dir_lseek,
 	.read		= generic_read_dir,
-	.readdir	= dcache_readdir,
+	.iterate	= dcache_readdir,
 	.fsync		= noop_fsync,
 };
 EXPORT_SYMBOL_GPL(spufs_context_fops);
@@ -620,12 +620,16 @@ spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return 0;
-			root->i_uid = option;
+			root->i_uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(root->i_uid))
+				return 0;
 			break;
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return 0;
-			root->i_gid = option;
+			root->i_gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(root->i_gid))
+				return 0;
 			break;
 		case Opt_mode:
 			if (match_octal(&args[0], &option))
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 49318385d4f..4a0a64fe25d 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -83,7 +83,6 @@ static struct timer_list spuloadavg_timer;
 #define MIN_SPU_TIMESLICE	max(5 * HZ / (1000 * SPUSCHED_TICK), 1)
 #define DEF_SPU_TIMESLICE	(100 * HZ / (1000 * SPUSCHED_TICK))
 
-#define MAX_USER_PRIO		(MAX_PRIO - MAX_RT_PRIO)
 #define SCALE_PRIO(x, prio) \
 	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)
 
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 67852ade4c0..bcfd6f063ef 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -35,7 +35,6 @@
 #define SPUFS_PS_MAP_SIZE	0x20000
 #define SPUFS_MFC_MAP_SIZE	0x1000
 #define SPUFS_CNTL_MAP_SIZE	0x1000
-#define SPUFS_CNTL_MAP_SIZE	0x1000
 #define SPUFS_SIGNAL_MAP_SIZE	PAGE_SIZE
 #define SPUFS_MSS_MAP_SIZE	0x1000
 
@@ -247,12 +246,13 @@ extern const struct spufs_tree_descr spufs_dir_debug_contents[];
 
 /* system call implementation */
 extern struct spufs_calls spufs_calls;
+struct coredump_params;
 long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
 long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
 			umode_t mode, struct file *filp);
 /* ELF coredump callbacks for writing SPU ELF notes */
 extern int spufs_coredump_extra_notes_size(void);
-extern int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset);
+extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
 
 extern const struct file_operations spufs_context_fops;
 
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index b045fdda484..a87200a535f 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -79,8 +79,10 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
 struct spufs_calls spufs_calls = {
 	.create_thread = do_spu_create,
 	.spu_run = do_spu_run,
-	.coredump_extra_notes_size = spufs_coredump_extra_notes_size,
-	.coredump_extra_notes_write = spufs_coredump_extra_notes_write,
 	.notify_spus_active = do_notify_spus_active,
 	.owner = THIS_MODULE,
+#ifdef CONFIG_COREDUMP
+	.coredump_extra_notes_size = spufs_coredump_extra_notes_size,
+	.coredump_extra_notes_write = spufs_coredump_extra_notes_write,
+#endif
 };
diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c
index d3ceff04ffc..9ef8cc3378d 100644
--- a/arch/powerpc/platforms/chrp/nvram.c
+++ b/arch/powerpc/platforms/chrp/nvram.c
@@ -66,7 +66,7 @@ static void chrp_nvram_write(int addr, unsigned char val)
 void __init chrp_nvram_init(void)
 {
 	struct device_node *nvram;
-	const unsigned int *nbytes_p;
+	const __be32 *nbytes_p;
 	unsigned int proplen;
 
 	nvram = of_find_node_by_type(NULL, "nvram");
@@ -79,7 +79,7 @@ void __init chrp_nvram_init(void)
 		return;
 	}
 
-	nvram_size = *nbytes_p;
+	nvram_size = be32_to_cpup(nbytes_p);
 
 	printk(KERN_INFO "CHRP nvram contains %u bytes\n", nvram_size);
 	of_node_put(nvram);
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index c665d7de6c9..7044fd36197 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -574,8 +574,8 @@ chrp_init2(void)
 
 static int __init chrp_probe(void)
 {
- 	char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
- 					  "device_type", NULL);
+	const char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
+						"device_type", NULL);
  	if (dtype == NULL)
  		return 0;
  	if (strcmp(dtype, "chrp"))
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
index dead91b177b..b6c9a0dcc92 100644
--- a/arch/powerpc/platforms/chrp/smp.c
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -14,7 +14,6 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/spinlock.h>
 
 #include <asm/ptrace.h>
diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
index 302ba43d73a..a25f496c2ef 100644
--- a/arch/powerpc/platforms/embedded6xx/Kconfig
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig
@@ -34,7 +34,6 @@ config MPC7448HPC2
 	select TSI108_BRIDGE
 	select DEFAULT_UIMAGE
 	select PPC_UDBG_16550
-	select TSI108_BRIDGE
 	help
 	  Select MPC7448HPC2 if configuring for Freescale MPC7448HPC2 (Taiga)
 	  platform
@@ -44,19 +43,10 @@ config PPC_HOLLY
 	depends on EMBEDDED6xx
 	select TSI108_BRIDGE
 	select PPC_UDBG_16550
-	select TSI108_BRIDGE
 	help
 	  Select PPC_HOLLY if configuring for an IBM 750GX/CL Eval
 	  Board with TSI108/9 bridge (Hickory/Holly)
 
-config PPC_PRPMC2800
-	bool "Motorola-PrPMC2800"
-	depends on EMBEDDED6xx
-	select MV64X60
-	select NOT_COHERENT_CACHE
-	help
-	  This option enables support for the Motorola PrPMC2800 board
-
 config PPC_C2K
 	bool "SBS/GEFanuc C2K board"
 	depends on EMBEDDED6xx
@@ -67,6 +57,19 @@ config PPC_C2K
 	  This option enables support for the GE Fanuc C2K board (formerly
 	  an SBS board).
 
+config MVME5100
+	bool "Motorola/Emerson MVME5100"
+	depends on EMBEDDED6xx
+	select MPIC
+	select PCI
+	select PPC_INDIRECT_PCI
+	select PPC_I8259
+	select PPC_NATIVE
+	select PPC_UDBG_16550
+	help
+	  This option enables support for the Motorola (now Emerson) MVME5100
+	  board.
+
 config TSI108_BRIDGE
 	bool
 	select PCI
@@ -113,4 +116,3 @@ config WII
 	help
 	  Select WII if configuring for the Nintendo Wii.
 	  More information at: <http://gc-linux.sourceforge.net/>
-
diff --git a/arch/powerpc/platforms/embedded6xx/Makefile b/arch/powerpc/platforms/embedded6xx/Makefile
index 66c23e423f4..f126a2a0998 100644
--- a/arch/powerpc/platforms/embedded6xx/Makefile
+++ b/arch/powerpc/platforms/embedded6xx/Makefile
@@ -5,9 +5,9 @@ obj-$(CONFIG_MPC7448HPC2)	+= mpc7448_hpc2.o
 obj-$(CONFIG_LINKSTATION)	+= linkstation.o ls_uart.o
 obj-$(CONFIG_STORCENTER)	+= storcenter.o
 obj-$(CONFIG_PPC_HOLLY)		+= holly.o
-obj-$(CONFIG_PPC_PRPMC2800)	+= prpmc2800.o
 obj-$(CONFIG_PPC_C2K)		+= c2k.o
 obj-$(CONFIG_USBGECKO_UDBG)	+= usbgecko_udbg.o
 obj-$(CONFIG_GAMECUBE_COMMON)	+= flipper-pic.o
 obj-$(CONFIG_GAMECUBE)		+= gamecube.o
 obj-$(CONFIG_WII)		+= wii.o hlwd-pic.o
+obj-$(CONFIG_MVME5100)		+= mvme5100.o
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index 53d6eee0196..4cde8e7da4b 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <asm/io.h>
 
 #include "flipper-pic.h"
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 3006b5117ec..c269caee58f 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -15,9 +15,10 @@
 #define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt
 
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <asm/io.h>
 
 #include "hlwd-pic.h"
@@ -181,6 +182,7 @@ struct irq_domain *hlwd_pic_init(struct device_node *np)
 					   &hlwd_irq_domain_ops, io_base);
 	if (!irq_domain) {
 		pr_err("failed to allocate irq_domain\n");
+		iounmap(io_base);
 		return NULL;
 	}
 
diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c
new file mode 100644
index 00000000000..25e3bfb64ef
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c
@@ -0,0 +1,221 @@
+/*
+ * Board setup routines for the Motorola/Emerson MVME5100.
+ *
+ * Copyright 2013 CSC Australia Pty. Ltd.
+ *
+ * Based on earlier code by:
+ *
+ *    Matt Porter, MontaVista Software Inc.
+ *    Copyright 2001 MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Author: Stephen Chivers <schivers@csc.com>
+ *
+ */
+
+#include <linux/of_platform.h>
+
+#include <asm/i8259.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <asm/prom.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#define HAWK_MPIC_SIZE		0x00040000U
+#define MVME5100_PCI_MEM_OFFSET 0x00000000
+
+/* Board register addresses. */
+#define BOARD_STATUS_REG	0xfef88080
+#define BOARD_MODFAIL_REG	0xfef88090
+#define BOARD_MODRST_REG	0xfef880a0
+#define BOARD_TBEN_REG		0xfef880c0
+#define BOARD_SW_READ_REG	0xfef880e0
+#define BOARD_GEO_ADDR_REG	0xfef880e8
+#define BOARD_EXT_FEATURE1_REG	0xfef880f0
+#define BOARD_EXT_FEATURE2_REG	0xfef88100
+
+static phys_addr_t pci_membase;
+static u_char *restart;
+
+static void mvme5100_8259_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned int cascade_irq = i8259_irq();
+
+	if (cascade_irq != NO_IRQ)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+static void __init mvme5100_pic_init(void)
+{
+	struct mpic *mpic;
+	struct device_node *np;
+	struct device_node *cp = NULL;
+	unsigned int cirq;
+	unsigned long intack = 0;
+	const u32 *prop = NULL;
+
+	np = of_find_node_by_type(NULL, "open-pic");
+	if (!np) {
+		pr_err("Could not find open-pic node\n");
+		return;
+	}
+
+	mpic = mpic_alloc(np, pci_membase, 0, 16, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+	of_node_put(np);
+
+	mpic_assign_isu(mpic, 0, pci_membase + 0x10000);
+
+	mpic_init(mpic);
+
+	cp = of_find_compatible_node(NULL, NULL, "chrp,iic");
+	if (cp == NULL) {
+		pr_warn("mvme5100_pic_init: couldn't find i8259\n");
+		return;
+	}
+
+	cirq = irq_of_parse_and_map(cp, 0);
+	if (cirq == NO_IRQ) {
+		pr_warn("mvme5100_pic_init: no cascade interrupt?\n");
+		return;
+	}
+
+	np = of_find_compatible_node(NULL, "pci", "mpc10x-pci");
+	if (np) {
+		prop = of_get_property(np, "8259-interrupt-acknowledge", NULL);
+
+		if (prop)
+			intack = prop[0];
+
+		of_node_put(np);
+	}
+
+	if (intack)
+		pr_debug("mvme5100_pic_init: PCI 8259 intack at 0x%016lx\n",
+		   intack);
+
+	i8259_init(cp, intack);
+	of_node_put(cp);
+	irq_set_chained_handler(cirq, mvme5100_8259_cascade);
+}
+
+static int __init mvme5100_add_bridge(struct device_node *dev)
+{
+	const int		*bus_range;
+	int			len;
+	struct pci_controller	*hose;
+	unsigned short		devid;
+
+	pr_info("Adding PCI host bridge %s\n", dev->full_name);
+
+	bus_range = of_get_property(dev, "bus-range", &len);
+
+	hose = pcibios_alloc_controller(dev);
+	if (hose == NULL)
+		return -ENOMEM;
+
+	hose->first_busno = bus_range ? bus_range[0] : 0;
+	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	setup_indirect_pci(hose, 0xfe000cf8, 0xfe000cfc, 0);
+
+	pci_process_bridge_OF_ranges(hose, dev, 1);
+
+	early_read_config_word(hose, 0, 0, PCI_DEVICE_ID, &devid);
+
+	if (devid != PCI_DEVICE_ID_MOTOROLA_HAWK) {
+		pr_err("HAWK PHB not present?\n");
+		return 0;
+	}
+
+	early_read_config_dword(hose, 0, 0, PCI_BASE_ADDRESS_1, &pci_membase);
+
+	if (pci_membase == 0) {
+		pr_err("HAWK PHB mibar not correctly set?\n");
+		return 0;
+	}
+
+	pr_info("mvme5100_pic_init: pci_membase: %x\n", pci_membase);
+
+	return 0;
+}
+
+static struct of_device_id mvme5100_of_bus_ids[] __initdata = {
+	{ .compatible = "hawk-bridge", },
+	{},
+};
+
+/*
+ * Setup the architecture
+ */
+static void __init mvme5100_setup_arch(void)
+{
+	struct device_node *np;
+
+	if (ppc_md.progress)
+		ppc_md.progress("mvme5100_setup_arch()", 0);
+
+	for_each_compatible_node(np, "pci", "hawk-pci")
+		mvme5100_add_bridge(np);
+
+	restart = ioremap(BOARD_MODRST_REG, 4);
+}
+
+
+static void mvme5100_show_cpuinfo(struct seq_file *m)
+{
+	seq_puts(m, "Vendor\t\t: Motorola/Emerson\n");
+	seq_puts(m, "Machine\t\t: MVME5100\n");
+}
+
+static void mvme5100_restart(char *cmd)
+{
+
+	local_irq_disable();
+	mtmsr(mfmsr() | MSR_IP);
+
+	out_8((u_char *) restart, 0x01);
+
+	while (1)
+		;
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mvme5100_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "MVME5100");
+}
+
+static int __init probe_of_platform_devices(void)
+{
+
+	of_platform_bus_probe(NULL, mvme5100_of_bus_ids, NULL);
+	return 0;
+}
+
+machine_device_initcall(mvme5100, probe_of_platform_devices);
+
+define_machine(mvme5100) {
+	.name			= "MVME5100",
+	.probe			= mvme5100_probe,
+	.setup_arch		= mvme5100_setup_arch,
+	.init_IRQ		= mvme5100_pic_init,
+	.show_cpuinfo		= mvme5100_show_cpuinfo,
+	.get_irq		= mpic_get_irq,
+	.restart		= mvme5100_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/prpmc2800.c b/arch/powerpc/platforms/embedded6xx/prpmc2800.c
deleted file mode 100644
index d455f08bea5..00000000000
--- a/arch/powerpc/platforms/embedded6xx/prpmc2800.c
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Board setup routines for the Motorola PrPMC2800
- *
- * Author: Dale Farnsworth <dale@farnsworth.org>
- *
- * 2007 (c) MontaVista, Software, Inc.  This file is licensed under
- * the terms of the GNU General Public License version 2.  This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/seq_file.h>
-
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/time.h>
-
-#include <mm/mmu_decl.h>
-
-#include <sysdev/mv64x60.h>
-
-#define MV64x60_MPP_CNTL_0	0x0000
-#define MV64x60_MPP_CNTL_2	0x0008
-
-#define MV64x60_GPP_IO_CNTL	0x0000
-#define MV64x60_GPP_LEVEL_CNTL	0x0010
-#define MV64x60_GPP_VALUE_SET	0x0018
-
-#define PLATFORM_NAME_MAX	32
-
-static char prpmc2800_platform_name[PLATFORM_NAME_MAX];
-
-static void __iomem *mv64x60_mpp_reg_base;
-static void __iomem *mv64x60_gpp_reg_base;
-
-static void __init prpmc2800_setup_arch(void)
-{
-	struct device_node *np;
-	phys_addr_t paddr;
-	const unsigned int *reg;
-
-	/*
-	 * ioremap mpp and gpp registers in case they are later
-	 * needed by prpmc2800_reset_board().
-	 */
-	np = of_find_compatible_node(NULL, NULL, "marvell,mv64360-mpp");
-	reg = of_get_property(np, "reg", NULL);
-	paddr = of_translate_address(np, reg);
-	of_node_put(np);
-	mv64x60_mpp_reg_base = ioremap(paddr, reg[1]);
-
-	np = of_find_compatible_node(NULL, NULL, "marvell,mv64360-gpp");
-	reg = of_get_property(np, "reg", NULL);
-	paddr = of_translate_address(np, reg);
-	of_node_put(np);
-	mv64x60_gpp_reg_base = ioremap(paddr, reg[1]);
-
-#ifdef CONFIG_PCI
-	mv64x60_pci_init();
-#endif
-
-	printk("Motorola %s\n", prpmc2800_platform_name);
-}
-
-static void prpmc2800_reset_board(void)
-{
-	u32 temp;
-
-	local_irq_disable();
-
-	temp = in_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_0);
-	temp &= 0xFFFF0FFF;
-	out_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_0, temp);
-
-	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL);
-	temp |= 0x00000004;
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL, temp);
-
-	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL);
-	temp |= 0x00000004;
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL, temp);
-
-	temp = in_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_2);
-	temp &= 0xFFFF0FFF;
-	out_le32(mv64x60_mpp_reg_base + MV64x60_MPP_CNTL_2, temp);
-
-	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL);
-	temp |= 0x00080000;
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_LEVEL_CNTL, temp);
-
-	temp = in_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL);
-	temp |= 0x00080000;
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_IO_CNTL, temp);
-
-	out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_VALUE_SET, 0x00080004);
-}
-
-static void prpmc2800_restart(char *cmd)
-{
-	volatile ulong i = 10000000;
-
-	prpmc2800_reset_board();
-
-	while (i-- > 0);
-	panic("restart failed\n");
-}
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define PPRPM2800_COHERENCY_SETTING "off"
-#else
-#define PPRPM2800_COHERENCY_SETTING "on"
-#endif
-
-void prpmc2800_show_cpuinfo(struct seq_file *m)
-{
-	seq_printf(m, "Vendor\t\t: Motorola\n");
-	seq_printf(m, "coherency\t: %s\n", PPRPM2800_COHERENCY_SETTING);
-}
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init prpmc2800_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-	unsigned long len = PLATFORM_NAME_MAX;
-	void *m;
-
-	if (!of_flat_dt_is_compatible(root, "motorola,PrPMC2800"))
-		return 0;
-
-	/* Update ppc_md.name with name from dt */
-	m = of_get_flat_dt_prop(root, "model", &len);
-	if (m)
-		strncpy(prpmc2800_platform_name, m,
-			min((int)len, PLATFORM_NAME_MAX - 1));
-
-	_set_L2CR(_get_L2CR() | L2CR_L2E);
-	return 1;
-}
-
-define_machine(prpmc2800){
-	.name			= prpmc2800_platform_name,
-	.probe			= prpmc2800_probe,
-	.setup_arch		= prpmc2800_setup_arch,
-	.init_early		= mv64x60_init_early,
-	.show_cpuinfo		= prpmc2800_show_cpuinfo,
-	.init_IRQ		= mv64x60_init_irq,
-	.get_irq		= mv64x60_get_irq,
-	.restart		= prpmc2800_restart,
-	.calibrate_decr		= generic_calibrate_decr,
-};
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
index 92ac9b52b32..b97f6f3d3c5 100644
--- a/arch/powerpc/platforms/fsl_uli1575.c
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -321,8 +321,7 @@ static void hpcd_final_uli5288(struct pci_dev *dev)
 {
 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
 	struct device_node *hosenode = hose ? hose->dn : NULL;
-	struct of_irq oirq;
-	int virq, pin = 2;
+	struct of_phandle_args oirq;
 	u32 laddr[3];
 
 	if (!machine_is(mpc86xx_hpcd))
@@ -331,12 +330,13 @@ static void hpcd_final_uli5288(struct pci_dev *dev)
 	if (!hosenode)
 		return;
 
+	oirq.np = hosenode;
+	oirq.args[0] = 2;
+	oirq.args_count = 1;
 	laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(31, 0) << 8);
 	laddr[1] = laddr[2] = 0;
-	of_irq_map_raw(hosenode, &pin, 1, laddr, &oirq);
-	virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
-				     oirq.size);
-	dev->irq = virq;
+	of_irq_parse_raw(laddr, &oirq);
+	dev->irq = irq_create_of_mapping(&oirq);
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, hpcd_quirk_uli1575);
diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile
index ce6d789e074..8e8d4cae5eb 100644
--- a/arch/powerpc/platforms/pasemi/Makefile
+++ b/arch/powerpc/platforms/pasemi/Makefile
@@ -1,3 +1,2 @@
 obj-y	+= setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o
 obj-$(CONFIG_PPC_PASEMI_MDIO)	+= gpio_mdio.o
-obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += cpufreq.o
diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/arch/powerpc/platforms/pasemi/cpufreq.c
deleted file mode 100644
index be1e7958909..00000000000
--- a/arch/powerpc/platforms/pasemi/cpufreq.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * Copyright (C) 2007 PA Semi, Inc
- *
- * Authors: Egor Martovetsky <egor@pasemi.com>
- *	    Olof Johansson <olof@lixom.net>
- *
- * Maintained by: Olof Johansson <olof@lixom.net>
- *
- * Based on arch/powerpc/platforms/cell/cbe_cpufreq.c:
- * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <linux/cpufreq.h>
-#include <linux/timer.h>
-#include <linux/module.h>
-
-#include <asm/hw_irq.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/time.h>
-#include <asm/smp.h>
-
-#define SDCASR_REG		0x0100
-#define SDCASR_REG_STRIDE	0x1000
-#define SDCPWR_CFGA0_REG	0x0100
-#define SDCPWR_PWST0_REG	0x0000
-#define SDCPWR_GIZTIME_REG	0x0440
-
-/* SDCPWR_GIZTIME_REG fields */
-#define SDCPWR_GIZTIME_GR	0x80000000
-#define SDCPWR_GIZTIME_LONGLOCK	0x000000ff
-
-/* Offset of ASR registers from SDC base */
-#define SDCASR_OFFSET		0x120000
-
-static void __iomem *sdcpwr_mapbase;
-static void __iomem *sdcasr_mapbase;
-
-static DEFINE_MUTEX(pas_switch_mutex);
-
-/* Current astate, is used when waking up from power savings on
- * one core, in case the other core has switched states during
- * the idle time.
- */
-static int current_astate;
-
-/* We support 5(A0-A4) power states excluding turbo(A5-A6) modes */
-static struct cpufreq_frequency_table pas_freqs[] = {
-	{0,	0},
-	{1,	0},
-	{2,	0},
-	{3,	0},
-	{4,	0},
-	{0,	CPUFREQ_TABLE_END},
-};
-
-static struct freq_attr *pas_cpu_freqs_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-/*
- * hardware specific functions
- */
-
-static int get_astate_freq(int astate)
-{
-	u32 ret;
-	ret = in_le32(sdcpwr_mapbase + SDCPWR_CFGA0_REG + (astate * 0x10));
-
-	return ret & 0x3f;
-}
-
-static int get_cur_astate(int cpu)
-{
-	u32 ret;
-
-	ret = in_le32(sdcpwr_mapbase + SDCPWR_PWST0_REG);
-	ret = (ret >> (cpu * 4)) & 0x7;
-
-	return ret;
-}
-
-static int get_gizmo_latency(void)
-{
-	u32 giztime, ret;
-
-	giztime = in_le32(sdcpwr_mapbase + SDCPWR_GIZTIME_REG);
-
-	/* just provide the upper bound */
-	if (giztime & SDCPWR_GIZTIME_GR)
-		ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 128000;
-	else
-		ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 1000;
-
-	return ret;
-}
-
-static void set_astate(int cpu, unsigned int astate)
-{
-	unsigned long flags;
-
-	/* Return if called before init has run */
-	if (unlikely(!sdcasr_mapbase))
-		return;
-
-	local_irq_save(flags);
-
-	out_le32(sdcasr_mapbase + SDCASR_REG + SDCASR_REG_STRIDE*cpu, astate);
-
-	local_irq_restore(flags);
-}
-
-int check_astate(void)
-{
-	return get_cur_astate(hard_smp_processor_id());
-}
-
-void restore_astate(int cpu)
-{
-	set_astate(cpu, current_astate);
-}
-
-/*
- * cpufreq functions
- */
-
-static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
-	const u32 *max_freqp;
-	u32 max_freq;
-	int i, cur_astate;
-	struct resource res;
-	struct device_node *cpu, *dn;
-	int err = -ENODEV;
-
-	cpu = of_get_cpu_node(policy->cpu, NULL);
-
-	if (!cpu)
-		goto out;
-
-	dn = of_find_compatible_node(NULL, NULL, "1682m-sdc");
-	if (!dn)
-		dn = of_find_compatible_node(NULL, NULL,
-					     "pasemi,pwrficient-sdc");
-	if (!dn)
-		goto out;
-	err = of_address_to_resource(dn, 0, &res);
-	of_node_put(dn);
-	if (err)
-		goto out;
-	sdcasr_mapbase = ioremap(res.start + SDCASR_OFFSET, 0x2000);
-	if (!sdcasr_mapbase) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	dn = of_find_compatible_node(NULL, NULL, "1682m-gizmo");
-	if (!dn)
-		dn = of_find_compatible_node(NULL, NULL,
-					     "pasemi,pwrficient-gizmo");
-	if (!dn) {
-		err = -ENODEV;
-		goto out_unmap_sdcasr;
-	}
-	err = of_address_to_resource(dn, 0, &res);
-	of_node_put(dn);
-	if (err)
-		goto out_unmap_sdcasr;
-	sdcpwr_mapbase = ioremap(res.start, 0x1000);
-	if (!sdcpwr_mapbase) {
-		err = -EINVAL;
-		goto out_unmap_sdcasr;
-	}
-
-	pr_debug("init cpufreq on CPU %d\n", policy->cpu);
-
-	max_freqp = of_get_property(cpu, "clock-frequency", NULL);
-	if (!max_freqp) {
-		err = -EINVAL;
-		goto out_unmap_sdcpwr;
-	}
-
-	/* we need the freq in kHz */
-	max_freq = *max_freqp / 1000;
-
-	pr_debug("max clock-frequency is at %u kHz\n", max_freq);
-	pr_debug("initializing frequency table\n");
-
-	/* initialize frequency table */
-	for (i=0; pas_freqs[i].frequency!=CPUFREQ_TABLE_END; i++) {
-		pas_freqs[i].frequency = get_astate_freq(pas_freqs[i].index) * 100000;
-		pr_debug("%d: %d\n", i, pas_freqs[i].frequency);
-	}
-
-	policy->cpuinfo.transition_latency = get_gizmo_latency();
-
-	cur_astate = get_cur_astate(policy->cpu);
-	pr_debug("current astate is at %d\n",cur_astate);
-
-	policy->cur = pas_freqs[cur_astate].frequency;
-	cpumask_copy(policy->cpus, cpu_online_mask);
-
-	ppc_proc_freq = policy->cur * 1000ul;
-
-	cpufreq_frequency_table_get_attr(pas_freqs, policy->cpu);
-
-	/* this ensures that policy->cpuinfo_min and policy->cpuinfo_max
-	 * are set correctly
-	 */
-	return cpufreq_frequency_table_cpuinfo(policy, pas_freqs);
-
-out_unmap_sdcpwr:
-	iounmap(sdcpwr_mapbase);
-
-out_unmap_sdcasr:
-	iounmap(sdcasr_mapbase);
-out:
-	return err;
-}
-
-static int pas_cpufreq_cpu_exit(struct cpufreq_policy *policy)
-{
-	/*
-	 * We don't support CPU hotplug. Don't unmap after the system
-	 * has already made it to a running state.
-	 */
-	if (system_state != SYSTEM_BOOTING)
-		return 0;
-
-	if (sdcasr_mapbase)
-		iounmap(sdcasr_mapbase);
-	if (sdcpwr_mapbase)
-		iounmap(sdcpwr_mapbase);
-
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static int pas_cpufreq_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, pas_freqs);
-}
-
-static int pas_cpufreq_target(struct cpufreq_policy *policy,
-			      unsigned int target_freq,
-			      unsigned int relation)
-{
-	struct cpufreq_freqs freqs;
-	int pas_astate_new;
-	int i;
-
-	cpufreq_frequency_table_target(policy,
-				       pas_freqs,
-				       target_freq,
-				       relation,
-				       &pas_astate_new);
-
-	freqs.old = policy->cur;
-	freqs.new = pas_freqs[pas_astate_new].frequency;
-
-	mutex_lock(&pas_switch_mutex);
-	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
-
-	pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n",
-		 policy->cpu,
-		 pas_freqs[pas_astate_new].frequency,
-		 pas_freqs[pas_astate_new].index);
-
-	current_astate = pas_astate_new;
-
-	for_each_online_cpu(i)
-		set_astate(i, pas_astate_new);
-
-	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
-	mutex_unlock(&pas_switch_mutex);
-
-	ppc_proc_freq = freqs.new * 1000ul;
-	return 0;
-}
-
-static struct cpufreq_driver pas_cpufreq_driver = {
-	.name		= "pas-cpufreq",
-	.owner		= THIS_MODULE,
-	.flags		= CPUFREQ_CONST_LOOPS,
-	.init		= pas_cpufreq_cpu_init,
-	.exit		= pas_cpufreq_cpu_exit,
-	.verify		= pas_cpufreq_verify,
-	.target		= pas_cpufreq_target,
-	.attr		= pas_cpu_freqs_attr,
-};
-
-/*
- * module init and destoy
- */
-
-static int __init pas_cpufreq_init(void)
-{
-	if (!of_machine_is_compatible("PA6T-1682M") &&
-	    !of_machine_is_compatible("pasemi,pwrficient"))
-		return -ENODEV;
-
-	return cpufreq_register_driver(&pas_cpufreq_driver);
-}
-
-static void __exit pas_cpufreq_exit(void)
-{
-	cpufreq_unregister_driver(&pas_cpufreq_driver);
-}
-
-module_init(pas_cpufreq_init);
-module_exit(pas_cpufreq_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>, Olof Johansson <olof@lixom.net>");
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
index f3defd8a280..aafa01ba062 100644
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -18,7 +18,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/export.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c
index 0237ab782fb..15adee54463 100644
--- a/arch/powerpc/platforms/pasemi/gpio_mdio.c
+++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c
@@ -30,6 +30,7 @@
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/phy.h>
+#include <linux/of_address.h>
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 7d2d036754b..2e576f2ae44 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -138,8 +138,11 @@ static void iommu_table_iobmap_setup(void)
 	pr_debug(" -> %s\n", __func__);
 	iommu_table_iobmap.it_busno = 0;
 	iommu_table_iobmap.it_offset = 0;
+	iommu_table_iobmap.it_page_shift = IOBMAP_PAGE_SHIFT;
+
 	/* it_size is in number of entries */
-	iommu_table_iobmap.it_size = 0x80000000 >> IOBMAP_PAGE_SHIFT;
+	iommu_table_iobmap.it_size =
+		0x80000000 >> iommu_table_iobmap.it_page_shift;
 
 	/* Initialize the common IOMMU code */
 	iommu_table_iobmap.it_base = (unsigned long)iob_l2_base;
diff --git a/arch/powerpc/platforms/pasemi/powersave.S b/arch/powerpc/platforms/pasemi/powersave.S
index 56f45adcd08..81ab555aa49 100644
--- a/arch/powerpc/platforms/pasemi/powersave.S
+++ b/arch/powerpc/platforms/pasemi/powersave.S
@@ -66,7 +66,7 @@ sleep_common:
 	std	r3, 48(r1)
 
 	/* Only do power savings when in astate 0 */
-	bl	.check_astate
+	bl	check_astate
 	cmpwi	r3,0
 	bne	1f
 
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index ea47df66fee..52c6ce1cc98 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -9,8 +9,6 @@ obj-y				+= pic.o setup.o time.o feature.o pci.o \
 				   sleep.o low_i2c.o cache.o pfunc_core.o \
 				   pfunc_base.o udbg_scc.o udbg_adb.o
 obj-$(CONFIG_PMAC_BACKLIGHT)	+= backlight.o
-obj-$(CONFIG_CPU_FREQ_PMAC)	+= cpufreq_32.o
-obj-$(CONFIG_CPU_FREQ_PMAC64)	+= cpufreq_64.o
 # CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really
 # need this to be a bool.  Cheat here and pretend CONFIG_NVRAM=m is really
 # CONFIG_NVRAM=y
diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c
deleted file mode 100644
index 3104fad8248..00000000000
--- a/arch/powerpc/platforms/powermac/cpufreq_32.c
+++ /dev/null
@@ -1,721 +0,0 @@
-/*
- *  Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
- *  Copyright (C) 2004        John Steele Scott <toojays@toojays.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * TODO: Need a big cleanup here. Basically, we need to have different
- * cpufreq_driver structures for the different type of HW instead of the
- * current mess. We also need to better deal with the detection of the
- * type of machine.
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/adb.h>
-#include <linux/pmu.h>
-#include <linux/cpufreq.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/hardirq.h>
-#include <asm/prom.h>
-#include <asm/machdep.h>
-#include <asm/irq.h>
-#include <asm/pmac_feature.h>
-#include <asm/mmu_context.h>
-#include <asm/sections.h>
-#include <asm/cputable.h>
-#include <asm/time.h>
-#include <asm/mpic.h>
-#include <asm/keylargo.h>
-#include <asm/switch_to.h>
-
-/* WARNING !!! This will cause calibrate_delay() to be called,
- * but this is an __init function ! So you MUST go edit
- * init/main.c to make it non-init before enabling DEBUG_FREQ
- */
-#undef DEBUG_FREQ
-
-extern void low_choose_7447a_dfs(int dfs);
-extern void low_choose_750fx_pll(int pll);
-extern void low_sleep_handler(void);
-
-/*
- * Currently, PowerMac cpufreq supports only high & low frequencies
- * that are set by the firmware
- */
-static unsigned int low_freq;
-static unsigned int hi_freq;
-static unsigned int cur_freq;
-static unsigned int sleep_freq;
-static unsigned long transition_latency;
-
-/*
- * Different models uses different mechanisms to switch the frequency
- */
-static int (*set_speed_proc)(int low_speed);
-static unsigned int (*get_speed_proc)(void);
-
-/*
- * Some definitions used by the various speedprocs
- */
-static u32 voltage_gpio;
-static u32 frequency_gpio;
-static u32 slew_done_gpio;
-static int no_schedule;
-static int has_cpu_l2lve;
-static int is_pmu_based;
-
-/* There are only two frequency states for each processor. Values
- * are in kHz for the time being.
- */
-#define CPUFREQ_HIGH                  0
-#define CPUFREQ_LOW                   1
-
-static struct cpufreq_frequency_table pmac_cpu_freqs[] = {
-	{CPUFREQ_HIGH, 		0},
-	{CPUFREQ_LOW,		0},
-	{0,			CPUFREQ_TABLE_END},
-};
-
-static struct freq_attr* pmac_cpu_freqs_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static inline void local_delay(unsigned long ms)
-{
-	if (no_schedule)
-		mdelay(ms);
-	else
-		msleep(ms);
-}
-
-#ifdef DEBUG_FREQ
-static inline void debug_calc_bogomips(void)
-{
-	/* This will cause a recalc of bogomips and display the
-	 * result. We backup/restore the value to avoid affecting the
-	 * core cpufreq framework's own calculation.
-	 */
-	unsigned long save_lpj = loops_per_jiffy;
-	calibrate_delay();
-	loops_per_jiffy = save_lpj;
-}
-#endif /* DEBUG_FREQ */
-
-/* Switch CPU speed under 750FX CPU control
- */
-static int cpu_750fx_cpu_speed(int low_speed)
-{
-	u32 hid2;
-
-	if (low_speed == 0) {
-		/* ramping up, set voltage first */
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
-		/* Make sure we sleep for at least 1ms */
-		local_delay(10);
-
-		/* tweak L2 for high voltage */
-		if (has_cpu_l2lve) {
-			hid2 = mfspr(SPRN_HID2);
-			hid2 &= ~0x2000;
-			mtspr(SPRN_HID2, hid2);
-		}
-	}
-#ifdef CONFIG_6xx
-	low_choose_750fx_pll(low_speed);
-#endif
-	if (low_speed == 1) {
-		/* tweak L2 for low voltage */
-		if (has_cpu_l2lve) {
-			hid2 = mfspr(SPRN_HID2);
-			hid2 |= 0x2000;
-			mtspr(SPRN_HID2, hid2);
-		}
-
-		/* ramping down, set voltage last */
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
-		local_delay(10);
-	}
-
-	return 0;
-}
-
-static unsigned int cpu_750fx_get_cpu_speed(void)
-{
-	if (mfspr(SPRN_HID1) & HID1_PS)
-		return low_freq;
-	else
-		return hi_freq;
-}
-
-/* Switch CPU speed using DFS */
-static int dfs_set_cpu_speed(int low_speed)
-{
-	if (low_speed == 0) {
-		/* ramping up, set voltage first */
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
-		/* Make sure we sleep for at least 1ms */
-		local_delay(1);
-	}
-
-	/* set frequency */
-#ifdef CONFIG_6xx
-	low_choose_7447a_dfs(low_speed);
-#endif
-	udelay(100);
-
-	if (low_speed == 1) {
-		/* ramping down, set voltage last */
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
-		local_delay(1);
-	}
-
-	return 0;
-}
-
-static unsigned int dfs_get_cpu_speed(void)
-{
-	if (mfspr(SPRN_HID1) & HID1_DFS)
-		return low_freq;
-	else
-		return hi_freq;
-}
-
-
-/* Switch CPU speed using slewing GPIOs
- */
-static int gpios_set_cpu_speed(int low_speed)
-{
-	int gpio, timeout = 0;
-
-	/* If ramping up, set voltage first */
-	if (low_speed == 0) {
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
-		/* Delay is way too big but it's ok, we schedule */
-		local_delay(10);
-	}
-
-	/* Set frequency */
-	gpio = 	pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0);
-	if (low_speed == ((gpio & 0x01) == 0))
-		goto skip;
-
-	pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, frequency_gpio,
-			  low_speed ? 0x04 : 0x05);
-	udelay(200);
-	do {
-		if (++timeout > 100)
-			break;
-		local_delay(1);
-		gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, slew_done_gpio, 0);
-	} while((gpio & 0x02) == 0);
- skip:
-	/* If ramping down, set voltage last */
-	if (low_speed == 1) {
-		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
-		/* Delay is way too big but it's ok, we schedule */
-		local_delay(10);
-	}
-
-#ifdef DEBUG_FREQ
-	debug_calc_bogomips();
-#endif
-
-	return 0;
-}
-
-/* Switch CPU speed under PMU control
- */
-static int pmu_set_cpu_speed(int low_speed)
-{
-	struct adb_request req;
-	unsigned long save_l2cr;
-	unsigned long save_l3cr;
-	unsigned int pic_prio;
-	unsigned long flags;
-
-	preempt_disable();
-
-#ifdef DEBUG_FREQ
-	printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1));
-#endif
-	pmu_suspend();
-
-	/* Disable all interrupt sources on openpic */
- 	pic_prio = mpic_cpu_get_priority();
-	mpic_cpu_set_priority(0xf);
-
-	/* Make sure the decrementer won't interrupt us */
-	asm volatile("mtdec %0" : : "r" (0x7fffffff));
-	/* Make sure any pending DEC interrupt occurring while we did
-	 * the above didn't re-enable the DEC */
-	mb();
-	asm volatile("mtdec %0" : : "r" (0x7fffffff));
-
-	/* We can now disable MSR_EE */
-	local_irq_save(flags);
-
-	/* Giveup the FPU & vec */
-	enable_kernel_fp();
-
-#ifdef CONFIG_ALTIVEC
-	if (cpu_has_feature(CPU_FTR_ALTIVEC))
-		enable_kernel_altivec();
-#endif /* CONFIG_ALTIVEC */
-
-	/* Save & disable L2 and L3 caches */
-	save_l3cr = _get_L3CR();	/* (returns -1 if not available) */
-	save_l2cr = _get_L2CR();	/* (returns -1 if not available) */
-
-	/* Send the new speed command. My assumption is that this command
-	 * will cause PLL_CFG[0..3] to be changed next time CPU goes to sleep
-	 */
-	pmu_request(&req, NULL, 6, PMU_CPU_SPEED, 'W', 'O', 'O', 'F', low_speed);
-	while (!req.complete)
-		pmu_poll();
-
-	/* Prepare the northbridge for the speed transition */
-	pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,1);
-
-	/* Call low level code to backup CPU state and recover from
-	 * hardware reset
-	 */
-	low_sleep_handler();
-
-	/* Restore the northbridge */
-	pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,0);
-
-	/* Restore L2 cache */
-	if (save_l2cr != 0xffffffff && (save_l2cr & L2CR_L2E) != 0)
- 		_set_L2CR(save_l2cr);
-	/* Restore L3 cache */
-	if (save_l3cr != 0xffffffff && (save_l3cr & L3CR_L3E) != 0)
- 		_set_L3CR(save_l3cr);
-
-	/* Restore userland MMU context */
-	switch_mmu_context(NULL, current->active_mm);
-
-#ifdef DEBUG_FREQ
-	printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1));
-#endif
-
-	/* Restore low level PMU operations */
-	pmu_unlock();
-
-	/*
-	 * Restore decrementer; we'll take a decrementer interrupt
-	 * as soon as interrupts are re-enabled and the generic
-	 * clockevents code will reprogram it with the right value.
-	 */
-	set_dec(1);
-
-	/* Restore interrupts */
- 	mpic_cpu_set_priority(pic_prio);
-
-	/* Let interrupts flow again ... */
-	local_irq_restore(flags);
-
-#ifdef DEBUG_FREQ
-	debug_calc_bogomips();
-#endif
-
-	pmu_resume();
-
-	preempt_enable();
-
-	return 0;
-}
-
-static int do_set_cpu_speed(struct cpufreq_policy *policy, int speed_mode,
-		int notify)
-{
-	struct cpufreq_freqs freqs;
-	unsigned long l3cr;
-	static unsigned long prev_l3cr;
-
-	freqs.old = cur_freq;
-	freqs.new = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq;
-
-	if (freqs.old == freqs.new)
-		return 0;
-
-	if (notify)
-		cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
-	if (speed_mode == CPUFREQ_LOW &&
-	    cpu_has_feature(CPU_FTR_L3CR)) {
-		l3cr = _get_L3CR();
-		if (l3cr & L3CR_L3E) {
-			prev_l3cr = l3cr;
-			_set_L3CR(0);
-		}
-	}
-	set_speed_proc(speed_mode == CPUFREQ_LOW);
-	if (speed_mode == CPUFREQ_HIGH &&
-	    cpu_has_feature(CPU_FTR_L3CR)) {
-		l3cr = _get_L3CR();
-		if ((prev_l3cr & L3CR_L3E) && l3cr != prev_l3cr)
-			_set_L3CR(prev_l3cr);
-	}
-	if (notify)
-		cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
-	cur_freq = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq;
-
-	return 0;
-}
-
-static unsigned int pmac_cpufreq_get_speed(unsigned int cpu)
-{
-	return cur_freq;
-}
-
-static int pmac_cpufreq_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, pmac_cpu_freqs);
-}
-
-static int pmac_cpufreq_target(	struct cpufreq_policy *policy,
-					unsigned int target_freq,
-					unsigned int relation)
-{
-	unsigned int    newstate = 0;
-	int		rc;
-
-	if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs,
-			target_freq, relation, &newstate))
-		return -EINVAL;
-
-	rc = do_set_cpu_speed(policy, newstate, 1);
-
-	ppc_proc_freq = cur_freq * 1000ul;
-	return rc;
-}
-
-static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	policy->cpuinfo.transition_latency	= transition_latency;
-	policy->cur = cur_freq;
-
-	cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu);
-	return cpufreq_frequency_table_cpuinfo(policy, pmac_cpu_freqs);
-}
-
-static u32 read_gpio(struct device_node *np)
-{
-	const u32 *reg = of_get_property(np, "reg", NULL);
-	u32 offset;
-
-	if (reg == NULL)
-		return 0;
-	/* That works for all keylargos but shall be fixed properly
-	 * some day... The problem is that it seems we can't rely
-	 * on the "reg" property of the GPIO nodes, they are either
-	 * relative to the base of KeyLargo or to the base of the
-	 * GPIO space, and the device-tree doesn't help.
-	 */
-	offset = *reg;
-	if (offset < KEYLARGO_GPIO_LEVELS0)
-		offset += KEYLARGO_GPIO_LEVELS0;
-	return offset;
-}
-
-static int pmac_cpufreq_suspend(struct cpufreq_policy *policy)
-{
-	/* Ok, this could be made a bit smarter, but let's be robust for now. We
-	 * always force a speed change to high speed before sleep, to make sure
-	 * we have appropriate voltage and/or bus speed for the wakeup process,
-	 * and to make sure our loops_per_jiffies are "good enough", that is will
-	 * not cause too short delays if we sleep in low speed and wake in high
-	 * speed..
-	 */
-	no_schedule = 1;
-	sleep_freq = cur_freq;
-	if (cur_freq == low_freq && !is_pmu_based)
-		do_set_cpu_speed(policy, CPUFREQ_HIGH, 0);
-	return 0;
-}
-
-static int pmac_cpufreq_resume(struct cpufreq_policy *policy)
-{
-	/* If we resume, first check if we have a get() function */
-	if (get_speed_proc)
-		cur_freq = get_speed_proc();
-	else
-		cur_freq = 0;
-
-	/* We don't, hrm... we don't really know our speed here, best
-	 * is that we force a switch to whatever it was, which is
-	 * probably high speed due to our suspend() routine
-	 */
-	do_set_cpu_speed(policy, sleep_freq == low_freq ?
-			 CPUFREQ_LOW : CPUFREQ_HIGH, 0);
-
-	ppc_proc_freq = cur_freq * 1000ul;
-
-	no_schedule = 0;
-	return 0;
-}
-
-static struct cpufreq_driver pmac_cpufreq_driver = {
-	.verify 	= pmac_cpufreq_verify,
-	.target 	= pmac_cpufreq_target,
-	.get		= pmac_cpufreq_get_speed,
-	.init		= pmac_cpufreq_cpu_init,
-	.suspend	= pmac_cpufreq_suspend,
-	.resume		= pmac_cpufreq_resume,
-	.flags		= CPUFREQ_PM_NO_WARN,
-	.attr		= pmac_cpu_freqs_attr,
-	.name		= "powermac",
-	.owner		= THIS_MODULE,
-};
-
-
-static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
-{
-	struct device_node *volt_gpio_np = of_find_node_by_name(NULL,
-								"voltage-gpio");
-	struct device_node *freq_gpio_np = of_find_node_by_name(NULL,
-								"frequency-gpio");
-	struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL,
-								     "slewing-done");
-	const u32 *value;
-
-	/*
-	 * Check to see if it's GPIO driven or PMU only
-	 *
-	 * The way we extract the GPIO address is slightly hackish, but it
-	 * works well enough for now. We need to abstract the whole GPIO
-	 * stuff sooner or later anyway
-	 */
-
-	if (volt_gpio_np)
-		voltage_gpio = read_gpio(volt_gpio_np);
-	if (freq_gpio_np)
-		frequency_gpio = read_gpio(freq_gpio_np);
-	if (slew_done_gpio_np)
-		slew_done_gpio = read_gpio(slew_done_gpio_np);
-
-	/* If we use the frequency GPIOs, calculate the min/max speeds based
-	 * on the bus frequencies
-	 */
-	if (frequency_gpio && slew_done_gpio) {
-		int lenp, rc;
-		const u32 *freqs, *ratio;
-
-		freqs = of_get_property(cpunode, "bus-frequencies", &lenp);
-		lenp /= sizeof(u32);
-		if (freqs == NULL || lenp != 2) {
-			printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n");
-			return 1;
-		}
-		ratio = of_get_property(cpunode, "processor-to-bus-ratio*2",
-						NULL);
-		if (ratio == NULL) {
-			printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n");
-			return 1;
-		}
-
-		/* Get the min/max bus frequencies */
-		low_freq = min(freqs[0], freqs[1]);
-		hi_freq = max(freqs[0], freqs[1]);
-
-		/* Grrrr.. It _seems_ that the device-tree is lying on the low bus
-		 * frequency, it claims it to be around 84Mhz on some models while
-		 * it appears to be approx. 101Mhz on all. Let's hack around here...
-		 * fortunately, we don't need to be too precise
-		 */
-		if (low_freq < 98000000)
-			low_freq = 101000000;
-
-		/* Convert those to CPU core clocks */
-		low_freq = (low_freq * (*ratio)) / 2000;
-		hi_freq = (hi_freq * (*ratio)) / 2000;
-
-		/* Now we get the frequencies, we read the GPIO to see what is out current
-		 * speed
-		 */
-		rc = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0);
-		cur_freq = (rc & 0x01) ? hi_freq : low_freq;
-
-		set_speed_proc = gpios_set_cpu_speed;
-		return 1;
-	}
-
-	/* If we use the PMU, look for the min & max frequencies in the
-	 * device-tree
-	 */
-	value = of_get_property(cpunode, "min-clock-frequency", NULL);
-	if (!value)
-		return 1;
-	low_freq = (*value) / 1000;
-	/* The PowerBook G4 12" (PowerBook6,1) has an error in the device-tree
-	 * here */
-	if (low_freq < 100000)
-		low_freq *= 10;
-
-	value = of_get_property(cpunode, "max-clock-frequency", NULL);
-	if (!value)
-		return 1;
-	hi_freq = (*value) / 1000;
-	set_speed_proc = pmu_set_cpu_speed;
-	is_pmu_based = 1;
-
-	return 0;
-}
-
-static int pmac_cpufreq_init_7447A(struct device_node *cpunode)
-{
-	struct device_node *volt_gpio_np;
-
-	if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL)
-		return 1;
-
-	volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
-	if (volt_gpio_np)
-		voltage_gpio = read_gpio(volt_gpio_np);
-	if (!voltage_gpio){
-		printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n");
-		return 1;
-	}
-
-	/* OF only reports the high frequency */
-	hi_freq = cur_freq;
-	low_freq = cur_freq/2;
-
-	/* Read actual frequency from CPU */
-	cur_freq = dfs_get_cpu_speed();
-	set_speed_proc = dfs_set_cpu_speed;
-	get_speed_proc = dfs_get_cpu_speed;
-
-	return 0;
-}
-
-static int pmac_cpufreq_init_750FX(struct device_node *cpunode)
-{
-	struct device_node *volt_gpio_np;
-	u32 pvr;
-	const u32 *value;
-
-	if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL)
-		return 1;
-
-	hi_freq = cur_freq;
-	value = of_get_property(cpunode, "reduced-clock-frequency", NULL);
-	if (!value)
-		return 1;
-	low_freq = (*value) / 1000;
-
-	volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
-	if (volt_gpio_np)
-		voltage_gpio = read_gpio(volt_gpio_np);
-
-	pvr = mfspr(SPRN_PVR);
-	has_cpu_l2lve = !((pvr & 0xf00) == 0x100);
-
-	set_speed_proc = cpu_750fx_cpu_speed;
-	get_speed_proc = cpu_750fx_get_cpu_speed;
-	cur_freq = cpu_750fx_get_cpu_speed();
-
-	return 0;
-}
-
-/* Currently, we support the following machines:
- *
- *  - Titanium PowerBook 1Ghz (PMU based, 667Mhz & 1Ghz)
- *  - Titanium PowerBook 800 (PMU based, 667Mhz & 800Mhz)
- *  - Titanium PowerBook 400 (PMU based, 300Mhz & 400Mhz)
- *  - Titanium PowerBook 500 (PMU based, 300Mhz & 500Mhz)
- *  - iBook2 500/600 (PMU based, 400Mhz & 500/600Mhz)
- *  - iBook2 700 (CPU based, 400Mhz & 700Mhz, support low voltage)
- *  - Recent MacRISC3 laptops
- *  - All new machines with 7447A CPUs
- */
-static int __init pmac_cpufreq_setup(void)
-{
-	struct device_node	*cpunode;
-	const u32		*value;
-
-	if (strstr(cmd_line, "nocpufreq"))
-		return 0;
-
-	/* Assume only one CPU */
-	cpunode = of_find_node_by_type(NULL, "cpu");
-	if (!cpunode)
-		goto out;
-
-	/* Get current cpu clock freq */
-	value = of_get_property(cpunode, "clock-frequency", NULL);
-	if (!value)
-		goto out;
-	cur_freq = (*value) / 1000;
-	transition_latency = CPUFREQ_ETERNAL;
-
-	/*  Check for 7447A based MacRISC3 */
-	if (of_machine_is_compatible("MacRISC3") &&
-	    of_get_property(cpunode, "dynamic-power-step", NULL) &&
-	    PVR_VER(mfspr(SPRN_PVR)) == 0x8003) {
-		pmac_cpufreq_init_7447A(cpunode);
-		transition_latency = 8000000;
-	/* Check for other MacRISC3 machines */
-	} else if (of_machine_is_compatible("PowerBook3,4") ||
-		   of_machine_is_compatible("PowerBook3,5") ||
-		   of_machine_is_compatible("MacRISC3")) {
-		pmac_cpufreq_init_MacRISC3(cpunode);
-	/* Else check for iBook2 500/600 */
-	} else if (of_machine_is_compatible("PowerBook4,1")) {
-		hi_freq = cur_freq;
-		low_freq = 400000;
-		set_speed_proc = pmu_set_cpu_speed;
-		is_pmu_based = 1;
-	}
-	/* Else check for TiPb 550 */
-	else if (of_machine_is_compatible("PowerBook3,3") && cur_freq == 550000) {
-		hi_freq = cur_freq;
-		low_freq = 500000;
-		set_speed_proc = pmu_set_cpu_speed;
-		is_pmu_based = 1;
-	}
-	/* Else check for TiPb 400 & 500 */
-	else if (of_machine_is_compatible("PowerBook3,2")) {
-		/* We only know about the 400 MHz and the 500Mhz model
-		 * they both have 300 MHz as low frequency
-		 */
-		if (cur_freq < 350000 || cur_freq > 550000)
-			goto out;
-		hi_freq = cur_freq;
-		low_freq = 300000;
-		set_speed_proc = pmu_set_cpu_speed;
-		is_pmu_based = 1;
-	}
-	/* Else check for 750FX */
-	else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000)
-		pmac_cpufreq_init_750FX(cpunode);
-out:
-	of_node_put(cpunode);
-	if (set_speed_proc == NULL)
-		return -ENODEV;
-
-	pmac_cpu_freqs[CPUFREQ_LOW].frequency = low_freq;
-	pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq;
-	ppc_proc_freq = cur_freq * 1000ul;
-
-	printk(KERN_INFO "Registering PowerMac CPU frequency driver\n");
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
-	       low_freq/1000, hi_freq/1000, cur_freq/1000);
-
-	return cpufreq_register_driver(&pmac_cpufreq_driver);
-}
-
-module_init(pmac_cpufreq_setup);
-
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c
deleted file mode 100644
index 7ba423431cf..00000000000
--- a/arch/powerpc/platforms/powermac/cpufreq_64.c
+++ /dev/null
@@ -1,746 +0,0 @@
-/*
- *  Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
- *  and                       Markus Demleitner <msdemlei@cl.uni-heidelberg.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs,
- * that is iMac G5 and latest single CPU desktop.
- */
-
-#undef DEBUG
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/cpufreq.h>
-#include <linux/init.h>
-#include <linux/completion.h>
-#include <linux/mutex.h>
-#include <asm/prom.h>
-#include <asm/machdep.h>
-#include <asm/irq.h>
-#include <asm/sections.h>
-#include <asm/cputable.h>
-#include <asm/time.h>
-#include <asm/smu.h>
-#include <asm/pmac_pfunc.h>
-
-#define DBG(fmt...) pr_debug(fmt)
-
-/* see 970FX user manual */
-
-#define SCOM_PCR 0x0aa001			/* PCR scom addr */
-
-#define PCR_HILO_SELECT		0x80000000U	/* 1 = PCR, 0 = PCRH */
-#define PCR_SPEED_FULL		0x00000000U	/* 1:1 speed value */
-#define PCR_SPEED_HALF		0x00020000U	/* 1:2 speed value */
-#define PCR_SPEED_QUARTER	0x00040000U	/* 1:4 speed value */
-#define PCR_SPEED_MASK		0x000e0000U	/* speed mask */
-#define PCR_SPEED_SHIFT		17
-#define PCR_FREQ_REQ_VALID	0x00010000U	/* freq request valid */
-#define PCR_VOLT_REQ_VALID	0x00008000U	/* volt request valid */
-#define PCR_TARGET_TIME_MASK	0x00006000U	/* target time */
-#define PCR_STATLAT_MASK	0x00001f00U	/* STATLAT value */
-#define PCR_SNOOPLAT_MASK	0x000000f0U	/* SNOOPLAT value */
-#define PCR_SNOOPACC_MASK	0x0000000fU	/* SNOOPACC value */
-
-#define SCOM_PSR 0x408001			/* PSR scom addr */
-/* warning: PSR is a 64 bits register */
-#define PSR_CMD_RECEIVED	0x2000000000000000U   /* command received */
-#define PSR_CMD_COMPLETED	0x1000000000000000U   /* command completed */
-#define PSR_CUR_SPEED_MASK	0x0300000000000000U   /* current speed */
-#define PSR_CUR_SPEED_SHIFT	(56)
-
-/*
- * The G5 only supports two frequencies (Quarter speed is not supported)
- */
-#define CPUFREQ_HIGH                  0
-#define CPUFREQ_LOW                   1
-
-static struct cpufreq_frequency_table g5_cpu_freqs[] = {
-	{CPUFREQ_HIGH, 		0},
-	{CPUFREQ_LOW,		0},
-	{0,			CPUFREQ_TABLE_END},
-};
-
-static struct freq_attr* g5_cpu_freqs_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-/* Power mode data is an array of the 32 bits PCR values to use for
- * the various frequencies, retrieved from the device-tree
- */
-static int g5_pmode_cur;
-
-static void (*g5_switch_volt)(int speed_mode);
-static int (*g5_switch_freq)(int speed_mode);
-static int (*g5_query_freq)(void);
-
-static DEFINE_MUTEX(g5_switch_mutex);
-
-static unsigned long transition_latency;
-
-#ifdef CONFIG_PMAC_SMU
-
-static const u32 *g5_pmode_data;
-static int g5_pmode_max;
-
-static struct smu_sdbp_fvt *g5_fvt_table;	/* table of op. points */
-static int g5_fvt_count;			/* number of op. points */
-static int g5_fvt_cur;				/* current op. point */
-
-/*
- * SMU based voltage switching for Neo2 platforms
- */
-
-static void g5_smu_switch_volt(int speed_mode)
-{
-	struct smu_simple_cmd	cmd;
-
-	DECLARE_COMPLETION_ONSTACK(comp);
-	smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, smu_done_complete,
-			 &comp, 'V', 'S', 'L', 'E', 'W',
-			 0xff, g5_fvt_cur+1, speed_mode);
-	wait_for_completion(&comp);
-}
-
-/*
- * Platform function based voltage/vdnap switching for Neo2
- */
-
-static struct pmf_function *pfunc_set_vdnap0;
-static struct pmf_function *pfunc_vdnap0_complete;
-
-static void g5_vdnap_switch_volt(int speed_mode)
-{
-	struct pmf_args args;
-	u32 slew, done = 0;
-	unsigned long timeout;
-
-	slew = (speed_mode == CPUFREQ_LOW) ? 1 : 0;
-	args.count = 1;
-	args.u[0].p = &slew;
-
-	pmf_call_one(pfunc_set_vdnap0, &args);
-
-	/* It's an irq GPIO so we should be able to just block here,
-	 * I'll do that later after I've properly tested the IRQ code for
-	 * platform functions
-	 */
-	timeout = jiffies + HZ/10;
-	while(!time_after(jiffies, timeout)) {
-		args.count = 1;
-		args.u[0].p = &done;
-		pmf_call_one(pfunc_vdnap0_complete, &args);
-		if (done)
-			break;
-		msleep(1);
-	}
-	if (done == 0)
-		printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
-}
-
-
-/*
- * SCOM based frequency switching for 970FX rev3
- */
-static int g5_scom_switch_freq(int speed_mode)
-{
-	unsigned long flags;
-	int to;
-
-	/* If frequency is going up, first ramp up the voltage */
-	if (speed_mode < g5_pmode_cur)
-		g5_switch_volt(speed_mode);
-
-	local_irq_save(flags);
-
-	/* Clear PCR high */
-	scom970_write(SCOM_PCR, 0);
-	/* Clear PCR low */
-       	scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0);
-	/* Set PCR low */
-	scom970_write(SCOM_PCR, PCR_HILO_SELECT |
-		      g5_pmode_data[speed_mode]);
-
-	/* Wait for completion */
-	for (to = 0; to < 10; to++) {
-		unsigned long psr = scom970_read(SCOM_PSR);
-
-		if ((psr & PSR_CMD_RECEIVED) == 0 &&
-		    (((psr >> PSR_CUR_SPEED_SHIFT) ^
-		      (g5_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3)
-		    == 0)
-			break;
-		if (psr & PSR_CMD_COMPLETED)
-			break;
-		udelay(100);
-	}
-
-	local_irq_restore(flags);
-
-	/* If frequency is going down, last ramp the voltage */
-	if (speed_mode > g5_pmode_cur)
-		g5_switch_volt(speed_mode);
-
-	g5_pmode_cur = speed_mode;
-	ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul;
-
-	return 0;
-}
-
-static int g5_scom_query_freq(void)
-{
-	unsigned long psr = scom970_read(SCOM_PSR);
-	int i;
-
-	for (i = 0; i <= g5_pmode_max; i++)
-		if ((((psr >> PSR_CUR_SPEED_SHIFT) ^
-		      (g5_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0)
-			break;
-	return i;
-}
-
-/*
- * Fake voltage switching for platforms with missing support
- */
-
-static void g5_dummy_switch_volt(int speed_mode)
-{
-}
-
-#endif /* CONFIG_PMAC_SMU */
-
-/*
- * Platform function based voltage switching for PowerMac7,2 & 7,3
- */
-
-static struct pmf_function *pfunc_cpu0_volt_high;
-static struct pmf_function *pfunc_cpu0_volt_low;
-static struct pmf_function *pfunc_cpu1_volt_high;
-static struct pmf_function *pfunc_cpu1_volt_low;
-
-static void g5_pfunc_switch_volt(int speed_mode)
-{
-	if (speed_mode == CPUFREQ_HIGH) {
-		if (pfunc_cpu0_volt_high)
-			pmf_call_one(pfunc_cpu0_volt_high, NULL);
-		if (pfunc_cpu1_volt_high)
-			pmf_call_one(pfunc_cpu1_volt_high, NULL);
-	} else {
-		if (pfunc_cpu0_volt_low)
-			pmf_call_one(pfunc_cpu0_volt_low, NULL);
-		if (pfunc_cpu1_volt_low)
-			pmf_call_one(pfunc_cpu1_volt_low, NULL);
-	}
-	msleep(10); /* should be faster , to fix */
-}
-
-/*
- * Platform function based frequency switching for PowerMac7,2 & 7,3
- */
-
-static struct pmf_function *pfunc_cpu_setfreq_high;
-static struct pmf_function *pfunc_cpu_setfreq_low;
-static struct pmf_function *pfunc_cpu_getfreq;
-static struct pmf_function *pfunc_slewing_done;
-
-static int g5_pfunc_switch_freq(int speed_mode)
-{
-	struct pmf_args args;
-	u32 done = 0;
-	unsigned long timeout;
-	int rc;
-
-	DBG("g5_pfunc_switch_freq(%d)\n", speed_mode);
-
-	/* If frequency is going up, first ramp up the voltage */
-	if (speed_mode < g5_pmode_cur)
-		g5_switch_volt(speed_mode);
-
-	/* Do it */
-	if (speed_mode == CPUFREQ_HIGH)
-		rc = pmf_call_one(pfunc_cpu_setfreq_high, NULL);
-	else
-		rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL);
-
-	if (rc)
-		printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc);
-
-	/* It's an irq GPIO so we should be able to just block here,
-	 * I'll do that later after I've properly tested the IRQ code for
-	 * platform functions
-	 */
-	timeout = jiffies + HZ/10;
-	while(!time_after(jiffies, timeout)) {
-		args.count = 1;
-		args.u[0].p = &done;
-		pmf_call_one(pfunc_slewing_done, &args);
-		if (done)
-			break;
-		msleep(1);
-	}
-	if (done == 0)
-		printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
-
-	/* If frequency is going down, last ramp the voltage */
-	if (speed_mode > g5_pmode_cur)
-		g5_switch_volt(speed_mode);
-
-	g5_pmode_cur = speed_mode;
-	ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul;
-
-	return 0;
-}
-
-static int g5_pfunc_query_freq(void)
-{
-	struct pmf_args args;
-	u32 val = 0;
-
-	args.count = 1;
-	args.u[0].p = &val;
-	pmf_call_one(pfunc_cpu_getfreq, &args);
-	return val ? CPUFREQ_HIGH : CPUFREQ_LOW;
-}
-
-
-/*
- * Common interface to the cpufreq core
- */
-
-static int g5_cpufreq_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, g5_cpu_freqs);
-}
-
-static int g5_cpufreq_target(struct cpufreq_policy *policy,
-	unsigned int target_freq, unsigned int relation)
-{
-	unsigned int newstate = 0;
-	struct cpufreq_freqs freqs;
-	int rc;
-
-	if (cpufreq_frequency_table_target(policy, g5_cpu_freqs,
-			target_freq, relation, &newstate))
-		return -EINVAL;
-
-	if (g5_pmode_cur == newstate)
-		return 0;
-
-	mutex_lock(&g5_switch_mutex);
-
-	freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency;
-	freqs.new = g5_cpu_freqs[newstate].frequency;
-
-	cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
-	rc = g5_switch_freq(newstate);
-	cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
-
-	mutex_unlock(&g5_switch_mutex);
-
-	return rc;
-}
-
-static unsigned int g5_cpufreq_get_speed(unsigned int cpu)
-{
-	return g5_cpu_freqs[g5_pmode_cur].frequency;
-}
-
-static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
-	policy->cpuinfo.transition_latency = transition_latency;
-	policy->cur = g5_cpu_freqs[g5_query_freq()].frequency;
-	/* secondary CPUs are tied to the primary one by the
-	 * cpufreq core if in the secondary policy we tell it that
-	 * it actually must be one policy together with all others. */
-	cpumask_copy(policy->cpus, cpu_online_mask);
-	cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu);
-
-	return cpufreq_frequency_table_cpuinfo(policy,
-		g5_cpu_freqs);
-}
-
-
-static struct cpufreq_driver g5_cpufreq_driver = {
-	.name		= "powermac",
-	.owner		= THIS_MODULE,
-	.flags		= CPUFREQ_CONST_LOOPS,
-	.init		= g5_cpufreq_cpu_init,
-	.verify		= g5_cpufreq_verify,
-	.target		= g5_cpufreq_target,
-	.get		= g5_cpufreq_get_speed,
-	.attr 		= g5_cpu_freqs_attr,
-};
-
-
-#ifdef CONFIG_PMAC_SMU
-
-static int __init g5_neo2_cpufreq_init(struct device_node *cpus)
-{
-	struct device_node *cpunode;
-	unsigned int psize, ssize;
-	unsigned long max_freq;
-	char *freq_method, *volt_method;
-	const u32 *valp;
-	u32 pvr_hi;
-	int use_volts_vdnap = 0;
-	int use_volts_smu = 0;
-	int rc = -ENODEV;
-
-	/* Check supported platforms */
-	if (of_machine_is_compatible("PowerMac8,1") ||
-	    of_machine_is_compatible("PowerMac8,2") ||
-	    of_machine_is_compatible("PowerMac9,1"))
-		use_volts_smu = 1;
-	else if (of_machine_is_compatible("PowerMac11,2"))
-		use_volts_vdnap = 1;
-	else
-		return -ENODEV;
-
-	/* Get first CPU node */
-	for (cpunode = NULL;
-	     (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) {
-		const u32 *reg = of_get_property(cpunode, "reg", NULL);
-		if (reg == NULL || (*reg) != 0)
-			continue;
-		if (!strcmp(cpunode->type, "cpu"))
-			break;
-	}
-	if (cpunode == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find any CPU 0 node\n");
-		return -ENODEV;
-	}
-
-	/* Check 970FX for now */
-	valp = of_get_property(cpunode, "cpu-version", NULL);
-	if (!valp) {
-		DBG("No cpu-version property !\n");
-		goto bail_noprops;
-	}
-	pvr_hi = (*valp) >> 16;
-	if (pvr_hi != 0x3c && pvr_hi != 0x44) {
-		printk(KERN_ERR "cpufreq: Unsupported CPU version\n");
-		goto bail_noprops;
-	}
-
-	/* Look for the powertune data in the device-tree */
-	g5_pmode_data = of_get_property(cpunode, "power-mode-data",&psize);
-	if (!g5_pmode_data) {
-		DBG("No power-mode-data !\n");
-		goto bail_noprops;
-	}
-	g5_pmode_max = psize / sizeof(u32) - 1;
-
-	if (use_volts_smu) {
-		const struct smu_sdbp_header *shdr;
-
-		/* Look for the FVT table */
-		shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL);
-		if (!shdr)
-			goto bail_noprops;
-		g5_fvt_table = (struct smu_sdbp_fvt *)&shdr[1];
-		ssize = (shdr->len * sizeof(u32)) -
-			sizeof(struct smu_sdbp_header);
-		g5_fvt_count = ssize / sizeof(struct smu_sdbp_fvt);
-		g5_fvt_cur = 0;
-
-		/* Sanity checking */
-		if (g5_fvt_count < 1 || g5_pmode_max < 1)
-			goto bail_noprops;
-
-		g5_switch_volt = g5_smu_switch_volt;
-		volt_method = "SMU";
-	} else if (use_volts_vdnap) {
-		struct device_node *root;
-
-		root = of_find_node_by_path("/");
-		if (root == NULL) {
-			printk(KERN_ERR "cpufreq: Can't find root of "
-			       "device tree\n");
-			goto bail_noprops;
-		}
-		pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0");
-		pfunc_vdnap0_complete =
-			pmf_find_function(root, "slewing-done");
-		if (pfunc_set_vdnap0 == NULL ||
-		    pfunc_vdnap0_complete == NULL) {
-			printk(KERN_ERR "cpufreq: Can't find required "
-			       "platform function\n");
-			goto bail_noprops;
-		}
-
-		g5_switch_volt = g5_vdnap_switch_volt;
-		volt_method = "GPIO";
-	} else {
-		g5_switch_volt = g5_dummy_switch_volt;
-		volt_method = "none";
-	}
-
-	/*
-	 * From what I see, clock-frequency is always the maximal frequency.
-	 * The current driver can not slew sysclk yet, so we really only deal
-	 * with powertune steps for now. We also only implement full freq and
-	 * half freq in this version. So far, I haven't yet seen a machine
-	 * supporting anything else.
-	 */
-	valp = of_get_property(cpunode, "clock-frequency", NULL);
-	if (!valp)
-		return -ENODEV;
-	max_freq = (*valp)/1000;
-	g5_cpu_freqs[0].frequency = max_freq;
-	g5_cpu_freqs[1].frequency = max_freq/2;
-
-	/* Set callbacks */
-	transition_latency = 12000;
-	g5_switch_freq = g5_scom_switch_freq;
-	g5_query_freq = g5_scom_query_freq;
-	freq_method = "SCOM";
-
-	/* Force apply current frequency to make sure everything is in
-	 * sync (voltage is right for example). Firmware may leave us with
-	 * a strange setting ...
-	 */
-	g5_switch_volt(CPUFREQ_HIGH);
-	msleep(10);
-	g5_pmode_cur = -1;
-	g5_switch_freq(g5_query_freq());
-
-	printk(KERN_INFO "Registering G5 CPU frequency driver\n");
-	printk(KERN_INFO "Frequency method: %s, Voltage method: %s\n",
-	       freq_method, volt_method);
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
-		g5_cpu_freqs[1].frequency/1000,
-		g5_cpu_freqs[0].frequency/1000,
-		g5_cpu_freqs[g5_pmode_cur].frequency/1000);
-
-	rc = cpufreq_register_driver(&g5_cpufreq_driver);
-
-	/* We keep the CPU node on hold... hopefully, Apple G5 don't have
-	 * hotplug CPU with a dynamic device-tree ...
-	 */
-	return rc;
-
- bail_noprops:
-	of_node_put(cpunode);
-
-	return rc;
-}
-
-#endif /* CONFIG_PMAC_SMU */
-
-
-static int __init g5_pm72_cpufreq_init(struct device_node *cpus)
-{
-	struct device_node *cpuid = NULL, *hwclock = NULL, *cpunode = NULL;
-	const u8 *eeprom = NULL;
-	const u32 *valp;
-	u64 max_freq, min_freq, ih, il;
-	int has_volt = 1, rc = 0;
-
-	DBG("cpufreq: Initializing for PowerMac7,2, PowerMac7,3 and"
-	    " RackMac3,1...\n");
-
-	/* Get first CPU node */
-	for (cpunode = NULL;
-	     (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) {
-		if (!strcmp(cpunode->type, "cpu"))
-			break;
-	}
-	if (cpunode == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find any CPU node\n");
-		return -ENODEV;
-	}
-
-	/* Lookup the cpuid eeprom node */
-        cpuid = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/cpuid@a0");
-	if (cpuid != NULL)
-		eeprom = of_get_property(cpuid, "cpuid", NULL);
-	if (eeprom == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n");
-		rc = -ENODEV;
-		goto bail;
-	}
-
-	/* Lookup the i2c hwclock */
-	for (hwclock = NULL;
-	     (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){
-		const char *loc = of_get_property(hwclock,
-				"hwctrl-location", NULL);
-		if (loc == NULL)
-			continue;
-		if (strcmp(loc, "CPU CLOCK"))
-			continue;
-		if (!of_get_property(hwclock, "platform-get-frequency", NULL))
-			continue;
-		break;
-	}
-	if (hwclock == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find i2c clock chip !\n");
-		rc = -ENODEV;
-		goto bail;
-	}
-
-	DBG("cpufreq: i2c clock chip found: %s\n", hwclock->full_name);
-
-	/* Now get all the platform functions */
-	pfunc_cpu_getfreq =
-		pmf_find_function(hwclock, "get-frequency");
-	pfunc_cpu_setfreq_high =
-		pmf_find_function(hwclock, "set-frequency-high");
-	pfunc_cpu_setfreq_low =
-		pmf_find_function(hwclock, "set-frequency-low");
-	pfunc_slewing_done =
-		pmf_find_function(hwclock, "slewing-done");
-	pfunc_cpu0_volt_high =
-		pmf_find_function(hwclock, "set-voltage-high-0");
-	pfunc_cpu0_volt_low =
-		pmf_find_function(hwclock, "set-voltage-low-0");
-	pfunc_cpu1_volt_high =
-		pmf_find_function(hwclock, "set-voltage-high-1");
-	pfunc_cpu1_volt_low =
-		pmf_find_function(hwclock, "set-voltage-low-1");
-
-	/* Check we have minimum requirements */
-	if (pfunc_cpu_getfreq == NULL || pfunc_cpu_setfreq_high == NULL ||
-	    pfunc_cpu_setfreq_low == NULL || pfunc_slewing_done == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find platform functions !\n");
-		rc = -ENODEV;
-		goto bail;
-	}
-
-	/* Check that we have complete sets */
-	if (pfunc_cpu0_volt_high == NULL || pfunc_cpu0_volt_low == NULL) {
-		pmf_put_function(pfunc_cpu0_volt_high);
-		pmf_put_function(pfunc_cpu0_volt_low);
-		pfunc_cpu0_volt_high = pfunc_cpu0_volt_low = NULL;
-		has_volt = 0;
-	}
-	if (!has_volt ||
-	    pfunc_cpu1_volt_high == NULL || pfunc_cpu1_volt_low == NULL) {
-		pmf_put_function(pfunc_cpu1_volt_high);
-		pmf_put_function(pfunc_cpu1_volt_low);
-		pfunc_cpu1_volt_high = pfunc_cpu1_volt_low = NULL;
-	}
-
-	/* Note: The device tree also contains a "platform-set-values"
-	 * function for which I haven't quite figured out the usage. It
-	 * might have to be called on init and/or wakeup, I'm not too sure
-	 * but things seem to work fine without it so far ...
-	 */
-
-	/* Get max frequency from device-tree */
-	valp = of_get_property(cpunode, "clock-frequency", NULL);
-	if (!valp) {
-		printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n");
-		rc = -ENODEV;
-		goto bail;
-	}
-
-	max_freq = (*valp)/1000;
-
-	/* Now calculate reduced frequency by using the cpuid input freq
-	 * ratio. This requires 64 bits math unless we are willing to lose
-	 * some precision
-	 */
-	ih = *((u32 *)(eeprom + 0x10));
-	il = *((u32 *)(eeprom + 0x20));
-
-	/* Check for machines with no useful settings */
-	if (il == ih) {
-		printk(KERN_WARNING "cpufreq: No low frequency mode available"
-		       " on this model !\n");
-		rc = -ENODEV;
-		goto bail;
-	}
-
-	min_freq = 0;
-	if (ih != 0 && il != 0)
-		min_freq = (max_freq * il) / ih;
-
-	/* Sanity check */
-	if (min_freq >= max_freq || min_freq < 1000) {
-		printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n");
-		rc = -ENXIO;
-		goto bail;
-	}
-	g5_cpu_freqs[0].frequency = max_freq;
-	g5_cpu_freqs[1].frequency = min_freq;
-
-	/* Set callbacks */
-	transition_latency = CPUFREQ_ETERNAL;
-	g5_switch_volt = g5_pfunc_switch_volt;
-	g5_switch_freq = g5_pfunc_switch_freq;
-	g5_query_freq = g5_pfunc_query_freq;
-
-	/* Force apply current frequency to make sure everything is in
-	 * sync (voltage is right for example). Firmware may leave us with
-	 * a strange setting ...
-	 */
-	g5_switch_volt(CPUFREQ_HIGH);
-	msleep(10);
-	g5_pmode_cur = -1;
-	g5_switch_freq(g5_query_freq());
-
-	printk(KERN_INFO "Registering G5 CPU frequency driver\n");
-	printk(KERN_INFO "Frequency method: i2c/pfunc, "
-	       "Voltage method: %s\n", has_volt ? "i2c/pfunc" : "none");
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
-		g5_cpu_freqs[1].frequency/1000,
-		g5_cpu_freqs[0].frequency/1000,
-		g5_cpu_freqs[g5_pmode_cur].frequency/1000);
-
-	rc = cpufreq_register_driver(&g5_cpufreq_driver);
- bail:
-	if (rc != 0) {
-		pmf_put_function(pfunc_cpu_getfreq);
-		pmf_put_function(pfunc_cpu_setfreq_high);
-		pmf_put_function(pfunc_cpu_setfreq_low);
-		pmf_put_function(pfunc_slewing_done);
-		pmf_put_function(pfunc_cpu0_volt_high);
-		pmf_put_function(pfunc_cpu0_volt_low);
-		pmf_put_function(pfunc_cpu1_volt_high);
-		pmf_put_function(pfunc_cpu1_volt_low);
-	}
-	of_node_put(hwclock);
-	of_node_put(cpuid);
-	of_node_put(cpunode);
-
-	return rc;
-}
-
-static int __init g5_cpufreq_init(void)
-{
-	struct device_node *cpus;
-	int rc = 0;
-
-	cpus = of_find_node_by_path("/cpus");
-	if (cpus == NULL) {
-		DBG("No /cpus node !\n");
-		return -ENODEV;
-	}
-
-	if (of_machine_is_compatible("PowerMac7,2") ||
-	    of_machine_is_compatible("PowerMac7,3") ||
-	    of_machine_is_compatible("RackMac3,1"))
-		rc = g5_pm72_cpufreq_init(cpus);
-#ifdef CONFIG_PMAC_SMU
-	else
-		rc = g5_neo2_cpufreq_init(cpus);
-#endif /* CONFIG_PMAC_SMU */
-
-	of_node_put(cpus);
-	return rc;
-}
-
-module_init(g5_cpufreq_init);
-
-
-MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index fc536f2971c..7553b6a77c6 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -452,7 +452,7 @@ static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
 	 */
 	if (use_irq) {
 		/* Clear completion */
-		INIT_COMPLETION(host->complete);
+		reinit_completion(&host->complete);
 		/* Ack stale interrupts */
 		kw_write_reg(reg_isr, kw_read_reg(reg_isr));
 		/* Arm timeout */
@@ -717,7 +717,7 @@ static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
 			return -EINVAL;
 		}
 
-		INIT_COMPLETION(comp);
+		reinit_completion(&comp);
 		req->data[0] = PMU_I2C_CMD;
 		req->reply[0] = 0xff;
 		req->nbytes = sizeof(struct pmu_i2c_hdr) + 1;
@@ -748,7 +748,7 @@ static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
 
 		hdr->bus = PMU_I2C_BUS_STATUS;
 
-		INIT_COMPLETION(comp);
+		reinit_completion(&comp);
 		req->data[0] = PMU_I2C_CMD;
 		req->reply[0] = 0xff;
 		req->nbytes = 2;
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index f5e3cda6660..e49d07f3d54 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -4,6 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/of_irq.h>
 
 #include <asm/pmac_feature.h>
 #include <asm/pmac_pfunc.h>
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index d588e48dff7..43075081721 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -5,7 +5,6 @@
  * FIXME: LOCKING !!!
  */
 
-#include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 31036b56670..4c24bf60d39 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -393,8 +393,8 @@ static void __init pmac_pic_probe_oldstyle(void)
 #endif
 }
 
-int of_irq_map_oldworld(struct device_node *device, int index,
-			struct of_irq *out_irq)
+int of_irq_parse_oldworld(struct device_node *device, int index,
+			struct of_phandle_args *out_irq)
 {
 	const u32 *ints = NULL;
 	int intlen;
@@ -422,9 +422,9 @@ int of_irq_map_oldworld(struct device_node *device, int index,
 	if (index >= intlen)
 		return -EINVAL;
 
-	out_irq->controller = NULL;
-	out_irq->specifier[0] = ints[index];
-	out_irq->size = 1;
+	out_irq->np = NULL;
+	out_irq->args[0] = ints[index];
+	out_irq->args_count = 1;
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index bdb738a69e4..5cbd4d67d5c 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -192,7 +192,7 @@ static int psurge_secondary_ipi_init(void)
 {
 	int rc = -ENOMEM;
 
-	psurge_host = irq_domain_add_nomap(NULL, 0, &psurge_host_ops, NULL);
+	psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL);
 
 	if (psurge_host)
 		psurge_secondary_virq = irq_create_direct_mapping(psurge_host);
@@ -885,7 +885,7 @@ static int smp_core99_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata smp_core99_cpu_nb = {
+static struct notifier_block smp_core99_cpu_nb = {
 	.notifier_call	= smp_core99_cpu_notify,
 };
 #endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index c24684c818a..45a8ed0585c 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -7,11 +7,17 @@ config PPC_POWERNV
 	select PPC_P7_NAP
 	select PPC_PCI_CHOICE if EMBEDDED
 	select EPAPR_BOOT
-	default y
-
-config POWERNV_MSI
-	bool "Support PCI MSI on PowerNV platform"
-	depends on PCI_MSI
+	select PPC_INDIRECT_PIO
+	select PPC_UDBG_16550
+	select PPC_SCOM
+	select ARCH_RANDOM
+	select CPU_FREQ
+	select CPU_FREQ_GOV_PERFORMANCE
+	select CPU_FREQ_GOV_POWERSAVE
+	select CPU_FREQ_GOV_USERSPACE
+	select CPU_FREQ_GOV_ONDEMAND
+	select CPU_FREQ_GOV_CONSERVATIVE
+	select PPC_DOORBELL
 	default y
 
 config PPC_POWERNV_RTAS
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index bcc3cb48a44..4ad227d04c1 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -1,5 +1,10 @@
-obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o
-obj-y			+= opal-rtc.o opal-nvram.o
+obj-y			+= setup.o opal-wrappers.o opal.o opal-async.o
+obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
+obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
+obj-y			+= opal-msglog.o
 
-obj-$(CONFIG_SMP)	+= smp.o
+obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
+obj-$(CONFIG_EEH)	+= eeh-ioda.o eeh-powernv.o
+obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o
+obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
new file mode 100644
index 00000000000..8ad0c5b891f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -0,0 +1,890 @@
+/*
+ * The file intends to implement the functions needed by EEH, which is
+ * built on IODA compliant chip. Actually, lots of functions related
+ * to EEH would be built based on the OPAL APIs.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bootmem.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/tce.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+static int ioda_eeh_nb_init = 0;
+
+static int ioda_eeh_event(struct notifier_block *nb,
+			  unsigned long events, void *change)
+{
+	uint64_t changed_evts = (uint64_t)change;
+
+	/*
+	 * We simply send special EEH event if EEH has
+	 * been enabled, or clear pending events in
+	 * case that we enable EEH soon
+	 */
+	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
+	    !(events & OPAL_EVENT_PCI_ERROR))
+		return 0;
+
+	if (eeh_enabled())
+		eeh_send_failure_event(NULL);
+	else
+		opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
+
+	return 0;
+}
+
+static struct notifier_block ioda_eeh_nb = {
+	.notifier_call	= ioda_eeh_event,
+	.next		= NULL,
+	.priority	= 0
+};
+
+#ifdef CONFIG_DEBUG_FS
+static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	out_be64(phb->regs + offset, val);
+	return 0;
+}
+
+static int ioda_eeh_dbgfs_get(void *data, int offset, u64 *val)
+{
+	struct pci_controller *hose = data;
+	struct pnv_phb *phb = hose->private_data;
+
+	*val = in_be64(phb->regs + offset);
+	return 0;
+}
+
+static int ioda_eeh_outb_dbgfs_set(void *data, u64 val)
+{
+	return ioda_eeh_dbgfs_set(data, 0xD10, val);
+}
+
+static int ioda_eeh_outb_dbgfs_get(void *data, u64 *val)
+{
+	return ioda_eeh_dbgfs_get(data, 0xD10, val);
+}
+
+static int ioda_eeh_inbA_dbgfs_set(void *data, u64 val)
+{
+	return ioda_eeh_dbgfs_set(data, 0xD90, val);
+}
+
+static int ioda_eeh_inbA_dbgfs_get(void *data, u64 *val)
+{
+	return ioda_eeh_dbgfs_get(data, 0xD90, val);
+}
+
+static int ioda_eeh_inbB_dbgfs_set(void *data, u64 val)
+{
+	return ioda_eeh_dbgfs_set(data, 0xE10, val);
+}
+
+static int ioda_eeh_inbB_dbgfs_get(void *data, u64 *val)
+{
+	return ioda_eeh_dbgfs_get(data, 0xE10, val);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_outb_dbgfs_ops, ioda_eeh_outb_dbgfs_get,
+			ioda_eeh_outb_dbgfs_set, "0x%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbA_dbgfs_ops, ioda_eeh_inbA_dbgfs_get,
+			ioda_eeh_inbA_dbgfs_set, "0x%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
+			ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
+#endif /* CONFIG_DEBUG_FS */
+
+
+/**
+ * ioda_eeh_post_init - Chip dependent post initialization
+ * @hose: PCI controller
+ *
+ * The function will be called after eeh PEs and devices
+ * have been built. That means the EEH is ready to supply
+ * service with I/O cache.
+ */
+static int ioda_eeh_post_init(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	int ret;
+
+	/* Register OPAL event notifier */
+	if (!ioda_eeh_nb_init) {
+		ret = opal_notifier_register(&ioda_eeh_nb);
+		if (ret) {
+			pr_err("%s: Can't register OPAL event notifier (%d)\n",
+			       __func__, ret);
+			return ret;
+		}
+
+		ioda_eeh_nb_init = 1;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	if (!phb->has_dbgfs && phb->dbgfs) {
+		phb->has_dbgfs = 1;
+
+		debugfs_create_file("err_injct_outbound", 0600,
+				    phb->dbgfs, hose,
+				    &ioda_eeh_outb_dbgfs_ops);
+		debugfs_create_file("err_injct_inboundA", 0600,
+				    phb->dbgfs, hose,
+				    &ioda_eeh_inbA_dbgfs_ops);
+		debugfs_create_file("err_injct_inboundB", 0600,
+				    phb->dbgfs, hose,
+				    &ioda_eeh_inbB_dbgfs_ops);
+	}
+#endif
+
+	/* If EEH is enabled, we're going to rely on that.
+	 * Otherwise, we restore to conventional mechanism
+	 * to clear frozen PE during PCI config access.
+	 */
+	if (eeh_enabled())
+		phb->flags |= PNV_PHB_FLAG_EEH;
+	else
+		phb->flags &= ~PNV_PHB_FLAG_EEH;
+
+	return 0;
+}
+
+/**
+ * ioda_eeh_set_option - Set EEH operation or I/O setting
+ * @pe: EEH PE
+ * @option: options
+ *
+ * Enable or disable EEH option for the indicated PE. The
+ * function also can be used to enable I/O or DMA for the
+ * PE.
+ */
+static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
+{
+	s64 ret;
+	u32 pe_no;
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+
+	/* Check on PE number */
+	if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
+		pr_err("%s: PE address %x out of range [0, %x] "
+		       "on PHB#%x\n",
+			__func__, pe->addr, phb->ioda.total_pe,
+			hose->global_number);
+		return -EINVAL;
+	}
+
+	pe_no = pe->addr;
+	switch (option) {
+	case EEH_OPT_DISABLE:
+		ret = -EEXIST;
+		break;
+	case EEH_OPT_ENABLE:
+		ret = 0;
+		break;
+	case EEH_OPT_THAW_MMIO:
+		ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+				OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO);
+		if (ret) {
+			pr_warning("%s: Failed to enable MMIO for "
+				   "PHB#%x-PE#%x, err=%lld\n",
+				__func__, hose->global_number, pe_no, ret);
+			return -EIO;
+		}
+
+		break;
+	case EEH_OPT_THAW_DMA:
+		ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+				OPAL_EEH_ACTION_CLEAR_FREEZE_DMA);
+		if (ret) {
+			pr_warning("%s: Failed to enable DMA for "
+				   "PHB#%x-PE#%x, err=%lld\n",
+				__func__, hose->global_number, pe_no, ret);
+			return -EIO;
+		}
+
+		break;
+	default:
+		pr_warning("%s: Invalid option %d\n", __func__, option);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static void ioda_eeh_phb_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	long rc;
+
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+					 PNV_PCI_DIAG_BUF_SIZE);
+	if (rc != OPAL_SUCCESS) {
+		pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
+			    __func__, hose->global_number, rc);
+		return;
+	}
+
+	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
+}
+
+/**
+ * ioda_eeh_get_state - Retrieve the state of PE
+ * @pe: EEH PE
+ *
+ * The PE's state should be retrieved from the PEEV, PEST
+ * IODA tables. Since the OPAL has exported the function
+ * to do it, it'd better to use that.
+ */
+static int ioda_eeh_get_state(struct eeh_pe *pe)
+{
+	s64 ret = 0;
+	u8 fstate;
+	__be16 pcierr;
+	u32 pe_no;
+	int result;
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+
+	/*
+	 * Sanity check on PE address. The PHB PE address should
+	 * be zero.
+	 */
+	if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
+		pr_err("%s: PE address %x out of range [0, %x] "
+		       "on PHB#%x\n",
+		       __func__, pe->addr, phb->ioda.total_pe,
+		       hose->global_number);
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	/*
+	 * If we're in middle of PE reset, return normal
+	 * state to keep EEH core going. For PHB reset, we
+	 * still expect to have fenced PHB cleared with
+	 * PHB reset.
+	 */
+	if (!(pe->type & EEH_PE_PHB) &&
+	    (pe->state & EEH_PE_RESET)) {
+		result = (EEH_STATE_MMIO_ACTIVE |
+			  EEH_STATE_DMA_ACTIVE |
+			  EEH_STATE_MMIO_ENABLED |
+			  EEH_STATE_DMA_ENABLED);
+		return result;
+	}
+
+	/* Retrieve PE status through OPAL */
+	pe_no = pe->addr;
+	ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+			&fstate, &pcierr, NULL);
+	if (ret) {
+		pr_err("%s: Failed to get EEH status on "
+		       "PHB#%x-PE#%x\n, err=%lld\n",
+		       __func__, hose->global_number, pe_no, ret);
+		return EEH_STATE_NOT_SUPPORT;
+	}
+
+	/* Check PHB status */
+	if (pe->type & EEH_PE_PHB) {
+		result = 0;
+		result &= ~EEH_STATE_RESET_ACTIVE;
+
+		if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
+			result |= EEH_STATE_MMIO_ACTIVE;
+			result |= EEH_STATE_DMA_ACTIVE;
+			result |= EEH_STATE_MMIO_ENABLED;
+			result |= EEH_STATE_DMA_ENABLED;
+		} else if (!(pe->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+			ioda_eeh_phb_diag(hose);
+		}
+
+		return result;
+	}
+
+	/* Parse result out */
+	result = 0;
+	switch (fstate) {
+	case OPAL_EEH_STOPPED_NOT_FROZEN:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		result |= EEH_STATE_MMIO_ACTIVE;
+		result |= EEH_STATE_DMA_ACTIVE;
+		result |= EEH_STATE_MMIO_ENABLED;
+		result |= EEH_STATE_DMA_ENABLED;
+		break;
+	case OPAL_EEH_STOPPED_MMIO_FREEZE:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		result |= EEH_STATE_DMA_ACTIVE;
+		result |= EEH_STATE_DMA_ENABLED;
+		break;
+	case OPAL_EEH_STOPPED_DMA_FREEZE:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		result |= EEH_STATE_MMIO_ACTIVE;
+		result |= EEH_STATE_MMIO_ENABLED;
+		break;
+	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
+		result &= ~EEH_STATE_RESET_ACTIVE;
+		break;
+	case OPAL_EEH_STOPPED_RESET:
+		result |= EEH_STATE_RESET_ACTIVE;
+		break;
+	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
+		result |= EEH_STATE_UNAVAILABLE;
+		break;
+	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
+		result |= EEH_STATE_NOT_SUPPORT;
+		break;
+	default:
+		pr_warning("%s: Unexpected EEH status 0x%x "
+			   "on PHB#%x-PE#%x\n",
+			   __func__, fstate, hose->global_number, pe_no);
+	}
+
+	/* Dump PHB diag-data for frozen PE */
+	if (result != EEH_STATE_NOT_SUPPORT &&
+	    (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) !=
+	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) &&
+	    !(pe->state & EEH_PE_ISOLATED)) {
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+		ioda_eeh_phb_diag(hose);
+	}
+
+	return result;
+}
+
+static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
+{
+	s64 rc = OPAL_HARDWARE;
+
+	while (1) {
+		rc = opal_pci_poll(phb->opal_id);
+		if (rc <= 0)
+			break;
+
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * rc);
+		else
+			msleep(rc);
+	}
+
+	return rc;
+}
+
+int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
+{
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc = OPAL_HARDWARE;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/* Issue PHB complete reset request */
+	if (option == EEH_RESET_FUNDAMENTAL ||
+	    option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				OPAL_PHB_COMPLETE,
+				OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				OPAL_PHB_COMPLETE,
+				OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
+
+	/*
+	 * Poll state of the PHB until the request is done
+	 * successfully. The PHB reset is usually PHB complete
+	 * reset followed by hot reset on root bus. So we also
+	 * need the PCI bus settlement delay.
+	 */
+	rc = ioda_eeh_phb_poll(phb);
+	if (option == EEH_RESET_DEACTIVATE) {
+		if (system_state < SYSTEM_RUNNING)
+			udelay(1000 * EEH_PE_RST_SETTLE_TIME);
+		else
+			msleep(EEH_PE_RST_SETTLE_TIME);
+	}
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+
+static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
+{
+	struct pnv_phb *phb = hose->private_data;
+	s64 rc = OPAL_SUCCESS;
+
+	pr_debug("%s: Reset PHB#%x, option=%d\n",
+		 __func__, hose->global_number, option);
+
+	/*
+	 * During the reset deassert time, we needn't care
+	 * the reset scope because the firmware does nothing
+	 * for fundamental or hot reset during deassert phase.
+	 */
+	if (option == EEH_RESET_FUNDAMENTAL)
+		rc = opal_pci_reset(phb->opal_id,
+				OPAL_PCI_FUNDAMENTAL_RESET,
+				OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_HOT)
+		rc = opal_pci_reset(phb->opal_id,
+				OPAL_PCI_HOT_RESET,
+				OPAL_ASSERT_RESET);
+	else if (option == EEH_RESET_DEACTIVATE)
+		rc = opal_pci_reset(phb->opal_id,
+				OPAL_PCI_HOT_RESET,
+				OPAL_DEASSERT_RESET);
+	if (rc < 0)
+		goto out;
+
+	/* Poll state of the PHB until the request is done */
+	rc = ioda_eeh_phb_poll(phb);
+	if (option == EEH_RESET_DEACTIVATE)
+		msleep(EEH_PE_RST_SETTLE_TIME);
+out:
+	if (rc != OPAL_SUCCESS)
+		return -EIO;
+
+	return 0;
+}
+
+static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option)
+
+{
+	struct device_node *dn = pci_device_to_OF_node(dev);
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	int aer = edev ? edev->aer_cap : 0;
+	u32 ctrl;
+
+	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
+		 __func__, pci_domain_nr(dev->bus),
+		 dev->bus->number, option);
+
+	switch (option) {
+	case EEH_RESET_FUNDAMENTAL:
+	case EEH_RESET_HOT:
+		/* Don't report linkDown event */
+		if (aer) {
+			eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl |= PCI_ERR_UNC_SURPDN;
+                        eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
+                }
+
+		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+		eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
+		msleep(EEH_PE_RST_HOLD_TIME);
+
+		break;
+	case EEH_RESET_DEACTIVATE:
+		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+		eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+		/* Continue reporting linkDown event */
+		if (aer) {
+			eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
+					     4, &ctrl);
+			ctrl &= ~PCI_ERR_UNC_SURPDN;
+			eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
+					      4, ctrl);
+		}
+
+		break;
+	}
+
+	return 0;
+}
+
+void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+	struct pci_controller *hose;
+
+	if (pci_is_root_bus(dev->bus)) {
+		hose = pci_bus_to_host(dev->bus);
+		ioda_eeh_root_reset(hose, EEH_RESET_HOT);
+		ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
+	} else {
+		ioda_eeh_bridge_reset(dev, EEH_RESET_HOT);
+		ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
+	}
+}
+
+/**
+ * ioda_eeh_reset - Reset the indicated PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Do reset on the indicated PE. For PCI bus sensitive PE,
+ * we need to reset the parent p2p bridge. The PHB has to
+ * be reinitialized if the p2p bridge is root bridge. For
+ * PCI device sensitive PE, we will try to reset the device
+ * through FLR. For now, we don't have OPAL APIs to do HARD
+ * reset yet, so all reset would be SOFT (HOT) reset.
+ */
+static int ioda_eeh_reset(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pci_bus *bus;
+	int ret;
+
+	/*
+	 * For PHB reset, we always have complete reset. For those PEs whose
+	 * primary bus derived from root complex (root bus) or root port
+	 * (usually bus#1), we apply hot or fundamental reset on the root port.
+	 * For other PEs, we always have hot reset on the PE primary bus.
+	 *
+	 * Here, we have different design to pHyp, which always clear the
+	 * frozen state during PE reset. However, the good idea here from
+	 * benh is to keep frozen state before we get PE reset done completely
+	 * (until BAR restore). With the frozen state, HW drops illegal IO
+	 * or MMIO access, which can incur recrusive frozen PE during PE
+	 * reset. The side effect is that EEH core has to clear the frozen
+	 * state explicitly after BAR restore.
+	 */
+	if (pe->type & EEH_PE_PHB) {
+		ret = ioda_eeh_phb_reset(hose, option);
+	} else {
+		bus = eeh_pe_bus_get(pe);
+		if (pci_is_root_bus(bus) ||
+		    pci_is_root_bus(bus->parent))
+			ret = ioda_eeh_root_reset(hose, option);
+		else
+			ret = ioda_eeh_bridge_reset(bus->self, option);
+	}
+
+	return ret;
+}
+
+/**
+ * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
+ * @pe: EEH PE
+ *
+ * For particular PE, it might have included PCI bridges. In order
+ * to make the PE work properly, those PCI bridges should be configured
+ * correctly. However, we need do nothing on P7IOC since the reset
+ * function will do everything that should be covered by the function.
+ */
+static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
+{
+	return 0;
+}
+
+static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
+{
+	/* GEM */
+	pr_info("  GEM XFIR:        %016llx\n", data->gemXfir);
+	pr_info("  GEM RFIR:        %016llx\n", data->gemRfir);
+	pr_info("  GEM RIRQFIR:     %016llx\n", data->gemRirqfir);
+	pr_info("  GEM Mask:        %016llx\n", data->gemMask);
+	pr_info("  GEM RWOF:        %016llx\n", data->gemRwof);
+
+	/* LEM */
+	pr_info("  LEM FIR:         %016llx\n", data->lemFir);
+	pr_info("  LEM Error Mask:  %016llx\n", data->lemErrMask);
+	pr_info("  LEM Action 0:    %016llx\n", data->lemAction0);
+	pr_info("  LEM Action 1:    %016llx\n", data->lemAction1);
+	pr_info("  LEM WOF:         %016llx\n", data->lemWof);
+}
+
+static void ioda_eeh_hub_diag(struct pci_controller *hose)
+{
+	struct pnv_phb *phb = hose->private_data;
+	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
+	long rc;
+
+	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
+	if (rc != OPAL_SUCCESS) {
+		pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n",
+			   __func__, phb->hub_id, rc);
+		return;
+	}
+
+	switch (data->type) {
+	case OPAL_P7IOC_DIAG_TYPE_RGC:
+		pr_info("P7IOC diag-data for RGC\n\n");
+		ioda_eeh_hub_diag_common(data);
+		pr_info("  RGC Status:      %016llx\n", data->rgc.rgcStatus);
+		pr_info("  RGC LDCP:        %016llx\n", data->rgc.rgcLdcp);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_BI:
+		pr_info("P7IOC diag-data for BI %s\n\n",
+			data->bi.biDownbound ? "Downbound" : "Upbound");
+		ioda_eeh_hub_diag_common(data);
+		pr_info("  BI LDCP 0:       %016llx\n", data->bi.biLdcp0);
+		pr_info("  BI LDCP 1:       %016llx\n", data->bi.biLdcp1);
+		pr_info("  BI LDCP 2:       %016llx\n", data->bi.biLdcp2);
+		pr_info("  BI Fence Status: %016llx\n", data->bi.biFenceStatus);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_CI:
+		pr_info("P7IOC diag-data for CI Port %d\\nn",
+			data->ci.ciPort);
+		ioda_eeh_hub_diag_common(data);
+		pr_info("  CI Port Status:  %016llx\n", data->ci.ciPortStatus);
+		pr_info("  CI Port LDCP:    %016llx\n", data->ci.ciPortLdcp);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_MISC:
+		pr_info("P7IOC diag-data for MISC\n\n");
+		ioda_eeh_hub_diag_common(data);
+		break;
+	case OPAL_P7IOC_DIAG_TYPE_I2C:
+		pr_info("P7IOC diag-data for I2C\n\n");
+		ioda_eeh_hub_diag_common(data);
+		break;
+	default:
+		pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n",
+			   __func__, phb->hub_id, data->type);
+	}
+}
+
+static int ioda_eeh_get_pe(struct pci_controller *hose,
+			   u16 pe_no, struct eeh_pe **pe)
+{
+	struct eeh_pe *phb_pe, *dev_pe;
+	struct eeh_dev dev;
+
+	/* Find the PHB PE */
+	phb_pe = eeh_phb_pe_get(hose);
+	if (!phb_pe)
+		return -EEXIST;
+
+	/* Find the PE according to PE# */
+	memset(&dev, 0, sizeof(struct eeh_dev));
+	dev.phb = hose;
+	dev.pe_config_addr = pe_no;
+	dev_pe = eeh_pe_get(&dev);
+	if (!dev_pe) return -EEXIST;
+
+	*pe = dev_pe;
+	return 0;
+}
+
+/**
+ * ioda_eeh_next_error - Retrieve next error for EEH core to handle
+ * @pe: The affected PE
+ *
+ * The function is expected to be called by EEH core while it gets
+ * special EEH event (without binding PE). The function calls to
+ * OPAL APIs for next error to handle. The informational error is
+ * handled internally by platform. However, the dead IOC, dead PHB,
+ * fenced PHB and frozen PE should be handled by EEH core eventually.
+ */
+static int ioda_eeh_next_error(struct eeh_pe **pe)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	struct eeh_pe *phb_pe, *parent_pe;
+	__be64 frozen_pe_no;
+	__be16 err_type, severity;
+	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+	long rc;
+	int state, ret = EEH_NEXT_ERR_NONE;
+
+	/*
+	 * While running here, it's safe to purge the event queue.
+	 * And we should keep the cached OPAL notifier event sychronized
+	 * between the kernel and firmware.
+	 */
+	eeh_remove_event(NULL, false);
+	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		/*
+		 * If the subordinate PCI buses of the PHB has been
+		 * removed or is exactly under error recovery, we
+		 * needn't take care of it any more.
+		 */
+		phb = hose->private_data;
+		phb_pe = eeh_phb_pe_get(hose);
+		if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
+			continue;
+
+		rc = opal_pci_next_error(phb->opal_id,
+				&frozen_pe_no, &err_type, &severity);
+
+		/* If OPAL API returns error, we needn't proceed */
+		if (rc != OPAL_SUCCESS) {
+			pr_devel("%s: Invalid return value on "
+				 "PHB#%x (0x%lx) from opal_pci_next_error",
+				 __func__, hose->global_number, rc);
+			continue;
+		}
+
+		/* If the PHB doesn't have error, stop processing */
+		if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
+		    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
+			pr_devel("%s: No error found on PHB#%x\n",
+				 __func__, hose->global_number);
+			continue;
+		}
+
+		/*
+		 * Processing the error. We're expecting the error with
+		 * highest priority reported upon multiple errors on the
+		 * specific PHB.
+		 */
+		pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
+			 __func__, be16_to_cpu(err_type), be16_to_cpu(severity),
+			 be64_to_cpu(frozen_pe_no), hose->global_number);
+		switch (be16_to_cpu(err_type)) {
+		case OPAL_EEH_IOC_ERROR:
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
+				pr_err("EEH: dead IOC detected\n");
+				ret = EEH_NEXT_ERR_DEAD_IOC;
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: IOC informative error "
+					"detected\n");
+				ioda_eeh_hub_diag(hose);
+				ret = EEH_NEXT_ERR_NONE;
+			}
+
+			break;
+		case OPAL_EEH_PHB_ERROR:
+			if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
+				*pe = phb_pe;
+				pr_err("EEH: dead PHB#%x detected, "
+				       "location: %s\n",
+				       hose->global_number,
+				       eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_DEAD_PHB;
+			} else if (be16_to_cpu(severity) ==
+						OPAL_EEH_SEV_PHB_FENCED) {
+				*pe = phb_pe;
+				pr_err("EEH: Fenced PHB#%x detected, "
+				       "location: %s\n",
+				       hose->global_number,
+				       eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_FENCED_PHB;
+			} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+				pr_info("EEH: PHB#%x informative error "
+					"detected, location: %s\n",
+					hose->global_number,
+					eeh_pe_loc_get(phb_pe));
+				ioda_eeh_phb_diag(hose);
+				ret = EEH_NEXT_ERR_NONE;
+			}
+
+			break;
+		case OPAL_EEH_PE_ERROR:
+			/*
+			 * If we can't find the corresponding PE, we
+			 * just try to unfreeze.
+			 */
+			if (ioda_eeh_get_pe(hose,
+					    be64_to_cpu(frozen_pe_no), pe)) {
+				/* Try best to clear it */
+				pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
+					hose->global_number, frozen_pe_no);
+				pr_info("EEH: PHB location: %s\n",
+					eeh_pe_loc_get(phb_pe));
+				opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
+					OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+				ret = EEH_NEXT_ERR_NONE;
+			} else if ((*pe)->state & EEH_PE_ISOLATED) {
+				ret = EEH_NEXT_ERR_NONE;
+			} else {
+				pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
+					(*pe)->addr, (*pe)->phb->global_number);
+				pr_err("EEH: PE location: %s, PHB location: %s\n",
+					eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe));
+				ret = EEH_NEXT_ERR_FROZEN_PE;
+			}
+
+			break;
+		default:
+			pr_warn("%s: Unexpected error type %d\n",
+				__func__, be16_to_cpu(err_type));
+		}
+
+		/*
+		 * EEH core will try recover from fenced PHB or
+		 * frozen PE. In the time for frozen PE, EEH core
+		 * enable IO path for that before collecting logs,
+		 * but it ruins the site. So we have to dump the
+		 * log in advance here.
+		 */
+		if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
+		    ret == EEH_NEXT_ERR_FENCED_PHB) &&
+		    !((*pe)->state & EEH_PE_ISOLATED)) {
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+			ioda_eeh_phb_diag(hose);
+		}
+
+		/*
+		 * We probably have the frozen parent PE out there and
+		 * we need have to handle frozen parent PE firstly.
+		 */
+		if (ret == EEH_NEXT_ERR_FROZEN_PE) {
+			parent_pe = (*pe)->parent;
+			while (parent_pe) {
+				/* Hit the ceiling ? */
+				if (parent_pe->type & EEH_PE_PHB)
+					break;
+
+				/* Frozen parent PE ? */
+				state = ioda_eeh_get_state(parent_pe);
+				if (state > 0 &&
+				    (state & active_flags) != active_flags)
+					*pe = parent_pe;
+
+				/* Next parent level */
+				parent_pe = parent_pe->parent;
+			}
+
+			/* We possibly migrate to another PE */
+			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+		}
+
+		/*
+		 * If we have no errors on the specific PHB or only
+		 * informative error there, we continue poking it.
+		 * Otherwise, we need actions to be taken by upper
+		 * layer.
+		 */
+		if (ret > EEH_NEXT_ERR_INF)
+			break;
+	}
+
+	return ret;
+}
+
+struct pnv_eeh_ops ioda_eeh_ops = {
+	.post_init		= ioda_eeh_post_init,
+	.set_option		= ioda_eeh_set_option,
+	.get_state		= ioda_eeh_get_state,
+	.reset			= ioda_eeh_reset,
+	.configure_bridge	= ioda_eeh_configure_bridge,
+	.next_error		= ioda_eeh_next_error
+};
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
new file mode 100644
index 00000000000..56a206f32f7
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -0,0 +1,413 @@
+/*
+ * The file intends to implement the platform dependent EEH operations on
+ * powernv platform. Actually, the powernv was created in order to fully
+ * hypervisor support.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/firmware.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+#include <asm/ppc-pci.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+/**
+ * powernv_eeh_init - EEH platform dependent initialization
+ *
+ * EEH platform dependent initialization on powernv
+ */
+static int powernv_eeh_init(void)
+{
+	/* We require OPALv3 */
+	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
+		pr_warning("%s: OPALv3 is required !\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Set EEH probe mode */
+	eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
+
+	return 0;
+}
+
+/**
+ * powernv_eeh_post_init - EEH platform dependent post initialization
+ *
+ * EEH platform dependent post initialization on powernv. When
+ * the function is called, the EEH PEs and devices should have
+ * been built. If the I/O cache staff has been built, EEH is
+ * ready to supply service.
+ */
+static int powernv_eeh_post_init(void)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	int ret = 0;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		if (phb->eeh_ops && phb->eeh_ops->post_init) {
+			ret = phb->eeh_ops->post_init(hose);
+			if (ret)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_dev_probe - Do probe on PCI device
+ * @dev: PCI device
+ * @flag: unused
+ *
+ * When EEH module is installed during system boot, all PCI devices
+ * are checked one by one to see if it supports EEH. The function
+ * is introduced for the purpose. By default, EEH has been enabled
+ * on all PCI devices. That's to say, we only need do necessary
+ * initialization on the corresponding eeh device and create PE
+ * accordingly.
+ *
+ * It's notable that's unsafe to retrieve the EEH device through
+ * the corresponding PCI device. During the PCI device hotplug, which
+ * was possiblly triggered by EEH core, the binding between EEH device
+ * and the PCI device isn't built yet.
+ */
+static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct device_node *dn = pci_device_to_OF_node(dev);
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+
+	/*
+	 * When probing the root bridge, which doesn't have any
+	 * subordinate PCI devices. We don't have OF node for
+	 * the root bridge. So it's not reasonable to continue
+	 * the probing.
+	 */
+	if (!dn || !edev || edev->pe)
+		return 0;
+
+	/* Skip for PCI-ISA bridge */
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
+		return 0;
+
+	/* Initialize eeh device */
+	edev->class_code = dev->class;
+	edev->mode	&= 0xFFFFFF00;
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+		edev->mode |= EEH_DEV_BRIDGE;
+	edev->pcix_cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+	if (pci_is_pcie(dev)) {
+		edev->pcie_cap = pci_pcie_cap(dev);
+
+		if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
+			edev->mode |= EEH_DEV_ROOT_PORT;
+		else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
+			edev->mode |= EEH_DEV_DS_PORT;
+
+		edev->aer_cap = pci_find_ext_capability(dev,
+							PCI_EXT_CAP_ID_ERR);
+	}
+
+	edev->config_addr	= ((dev->bus->number << 8) | dev->devfn);
+	edev->pe_config_addr	= phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
+
+	/* Create PE */
+	eeh_add_to_parent_pe(edev);
+
+	/*
+	 * Enable EEH explicitly so that we will do EEH check
+	 * while accessing I/O stuff
+	 */
+	eeh_set_enable(true);
+
+	/* Save memory bars */
+	eeh_save_bars(edev);
+
+	return 0;
+}
+
+/**
+ * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @pe: EEH PE
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int powernv_eeh_set_option(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = -EEXIST;
+
+	/*
+	 * What we need do is pass it down for hardware
+	 * implementation to handle it.
+	 */
+	if (phb->eeh_ops && phb->eeh_ops->set_option)
+		ret = phb->eeh_ops->set_option(pe, option);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_get_pe_addr - Retrieve PE address
+ * @pe: EEH PE
+ *
+ * Retrieve the PE address according to the given tranditional
+ * PCI BDF (Bus/Device/Function) address.
+ */
+static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
+{
+	return pe->addr;
+}
+
+/**
+ * powernv_eeh_get_state - Retrieve PE state
+ * @pe: EEH PE
+ * @delay: delay while PE state is temporarily unavailable
+ *
+ * Retrieve the state of the specified PE. For IODA-compitable
+ * platform, it should be retrieved from IODA table. Therefore,
+ * we prefer passing down to hardware implementation to handle
+ * it.
+ */
+static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = EEH_STATE_NOT_SUPPORT;
+
+	if (phb->eeh_ops && phb->eeh_ops->get_state) {
+		ret = phb->eeh_ops->get_state(pe);
+
+		/*
+		 * If the PE state is temporarily unavailable,
+		 * to inform the EEH core delay for default
+		 * period (1 second)
+		 */
+		if (delay) {
+			*delay = 0;
+			if (ret & EEH_STATE_UNAVAILABLE)
+				*delay = 1000;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_reset - Reset the specified PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Reset the specified PE
+ */
+static int powernv_eeh_reset(struct eeh_pe *pe, int option)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = -EEXIST;
+
+	if (phb->eeh_ops && phb->eeh_ops->reset)
+		ret = phb->eeh_ops->reset(pe, option);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in microsecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+	int ret;
+	int mwait;
+
+	while (1) {
+		ret = powernv_eeh_get_state(pe, &mwait);
+
+		/*
+		 * If the PE's state is temporarily unavailable,
+		 * we have to wait for the specified time. Otherwise,
+		 * the PE's state will be returned immediately.
+		 */
+		if (ret != EEH_STATE_UNAVAILABLE)
+			return ret;
+
+		max_wait -= mwait;
+		if (max_wait <= 0) {
+			pr_warning("%s: Timeout getting PE#%x's state (%d)\n",
+				   __func__, pe->addr, max_wait);
+			return EEH_STATE_NOT_SUPPORT;
+		}
+
+		msleep(mwait);
+	}
+
+	return EEH_STATE_NOT_SUPPORT;
+}
+
+/**
+ * powernv_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ */
+static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
+			char *drv_log, unsigned long len)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = -EEXIST;
+
+	if (phb->eeh_ops && phb->eeh_ops->get_log)
+		ret = phb->eeh_ops->get_log(pe, severity, drv_log, len);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @pe: EEH PE
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
+{
+	struct pci_controller *hose = pe->phb;
+	struct pnv_phb *phb = hose->private_data;
+	int ret = 0;
+
+	if (phb->eeh_ops && phb->eeh_ops->configure_bridge)
+		ret = phb->eeh_ops->configure_bridge(pe);
+
+	return ret;
+}
+
+/**
+ * powernv_eeh_next_error - Retrieve next EEH error to handle
+ * @pe: Affected PE
+ *
+ * Using OPAL API, to retrieve next EEH error for EEH core to handle
+ */
+static int powernv_eeh_next_error(struct eeh_pe **pe)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb = NULL;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		phb = hose->private_data;
+		break;
+	}
+
+	if (phb && phb->eeh_ops->next_error)
+		return phb->eeh_ops->next_error(pe);
+
+	return -EEXIST;
+}
+
+static int powernv_eeh_restore_config(struct device_node *dn)
+{
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct pnv_phb *phb;
+	s64 ret;
+
+	if (!edev)
+		return -EEXIST;
+
+	phb = edev->phb->private_data;
+	ret = opal_pci_reinit(phb->opal_id,
+			      OPAL_REINIT_PCI_DEV, edev->config_addr);
+	if (ret) {
+		pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
+			__func__, edev->config_addr, ret);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static struct eeh_ops powernv_eeh_ops = {
+	.name                   = "powernv",
+	.init                   = powernv_eeh_init,
+	.post_init              = powernv_eeh_post_init,
+	.of_probe               = NULL,
+	.dev_probe              = powernv_eeh_dev_probe,
+	.set_option             = powernv_eeh_set_option,
+	.get_pe_addr            = powernv_eeh_get_pe_addr,
+	.get_state              = powernv_eeh_get_state,
+	.reset                  = powernv_eeh_reset,
+	.wait_state             = powernv_eeh_wait_state,
+	.get_log                = powernv_eeh_get_log,
+	.configure_bridge       = powernv_eeh_configure_bridge,
+	.read_config            = pnv_pci_cfg_read,
+	.write_config           = pnv_pci_cfg_write,
+	.next_error		= powernv_eeh_next_error,
+	.restore_config		= powernv_eeh_restore_config
+};
+
+/**
+ * eeh_powernv_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on powernv platform. This function should be
+ * called before any EEH related functions.
+ */
+static int __init eeh_powernv_init(void)
+{
+	int ret = -EINVAL;
+
+	if (!machine_is(powernv))
+		return ret;
+
+	ret = eeh_ops_register(&powernv_eeh_ops);
+	if (!ret)
+		pr_info("EEH: PowerNV platform initialized\n");
+	else
+		pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
+
+	return ret;
+}
+
+early_initcall(eeh_powernv_init);
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
new file mode 100644
index 00000000000..32e2adfa532
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -0,0 +1,204 @@
+/*
+ * PowerNV OPAL asynchronous completion interfaces
+ *
+ * Copyright 2013 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/gfp.h>
+#include <linux/of.h>
+#include <asm/opal.h>
+
+#define N_ASYNC_COMPLETIONS	64
+
+static DECLARE_BITMAP(opal_async_complete_map, N_ASYNC_COMPLETIONS) = {~0UL};
+static DECLARE_BITMAP(opal_async_token_map, N_ASYNC_COMPLETIONS);
+static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait);
+static DEFINE_SPINLOCK(opal_async_comp_lock);
+static struct semaphore opal_async_sem;
+static struct opal_msg *opal_async_responses;
+static unsigned int opal_max_async_tokens;
+
+int __opal_async_get_token(void)
+{
+	unsigned long flags;
+	int token;
+
+	spin_lock_irqsave(&opal_async_comp_lock, flags);
+	token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);
+	if (token >= opal_max_async_tokens) {
+		token = -EBUSY;
+		goto out;
+	}
+
+	if (__test_and_set_bit(token, opal_async_token_map)) {
+		token = -EBUSY;
+		goto out;
+	}
+
+	__clear_bit(token, opal_async_complete_map);
+
+out:
+	spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+	return token;
+}
+
+int opal_async_get_token_interruptible(void)
+{
+	int token;
+
+	/* Wait until a token is available */
+	if (down_interruptible(&opal_async_sem))
+		return -ERESTARTSYS;
+
+	token = __opal_async_get_token();
+	if (token < 0)
+		up(&opal_async_sem);
+
+	return token;
+}
+
+int __opal_async_release_token(int token)
+{
+	unsigned long flags;
+
+	if (token < 0 || token >= opal_max_async_tokens) {
+		pr_err("%s: Passed token is out of range, token %d\n",
+				__func__, token);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&opal_async_comp_lock, flags);
+	__set_bit(token, opal_async_complete_map);
+	__clear_bit(token, opal_async_token_map);
+	spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+
+	return 0;
+}
+
+int opal_async_release_token(int token)
+{
+	int ret;
+
+	ret = __opal_async_release_token(token);
+	if (ret)
+		return ret;
+
+	up(&opal_async_sem);
+
+	return 0;
+}
+
+int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
+{
+	if (token >= opal_max_async_tokens) {
+		pr_err("%s: Invalid token passed\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!msg) {
+		pr_err("%s: Invalid message pointer passed\n", __func__);
+		return -EINVAL;
+	}
+
+	wait_event(opal_async_wait, test_bit(token, opal_async_complete_map));
+	memcpy(msg, &opal_async_responses[token], sizeof(*msg));
+
+	return 0;
+}
+
+static int opal_async_comp_event(struct notifier_block *nb,
+		unsigned long msg_type, void *msg)
+{
+	struct opal_msg *comp_msg = msg;
+	unsigned long flags;
+	uint64_t token;
+
+	if (msg_type != OPAL_MSG_ASYNC_COMP)
+		return 0;
+
+	token = be64_to_cpu(comp_msg->params[0]);
+	memcpy(&opal_async_responses[token], comp_msg, sizeof(*comp_msg));
+	spin_lock_irqsave(&opal_async_comp_lock, flags);
+	__set_bit(token, opal_async_complete_map);
+	spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+
+	wake_up(&opal_async_wait);
+
+	return 0;
+}
+
+static struct notifier_block opal_async_comp_nb = {
+		.notifier_call	= opal_async_comp_event,
+		.next		= NULL,
+		.priority	= 0,
+};
+
+static int __init opal_async_comp_init(void)
+{
+	struct device_node *opal_node;
+	const __be32 *async;
+	int err;
+
+	opal_node = of_find_node_by_path("/ibm,opal");
+	if (!opal_node) {
+		pr_err("%s: Opal node not found\n", __func__);
+		err = -ENOENT;
+		goto out;
+	}
+
+	async = of_get_property(opal_node, "opal-msg-async-num", NULL);
+	if (!async) {
+		pr_err("%s: %s has no opal-msg-async-num\n",
+				__func__, opal_node->full_name);
+		err = -ENOENT;
+		goto out_opal_node;
+	}
+
+	opal_max_async_tokens = be32_to_cpup(async);
+	if (opal_max_async_tokens > N_ASYNC_COMPLETIONS)
+		opal_max_async_tokens = N_ASYNC_COMPLETIONS;
+
+	err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP,
+			&opal_async_comp_nb);
+	if (err) {
+		pr_err("%s: Can't register OPAL event notifier (%d)\n",
+				__func__, err);
+		goto out_opal_node;
+	}
+
+	opal_async_responses = kzalloc(
+			sizeof(*opal_async_responses) * opal_max_async_tokens,
+			GFP_KERNEL);
+	if (!opal_async_responses) {
+		pr_err("%s: Out of memory, failed to do asynchronous "
+				"completion init\n", __func__);
+		err = -ENOMEM;
+		goto out_opal_node;
+	}
+
+	/* Initialize to 1 less than the maximum tokens available, as we may
+	 * require to pop one during emergency through synchronous call to
+	 * __opal_async_get_token()
+	 */
+	sema_init(&opal_async_sem, opal_max_async_tokens - 1);
+
+out_opal_node:
+	of_node_put(opal_node);
+out:
+	return err;
+}
+subsys_initcall(opal_async_comp_init);
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
new file mode 100644
index 00000000000..788a1977b9a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -0,0 +1,448 @@
+/*
+ * PowerNV OPAL Dump Interface
+ *
+ * Copyright 2013,2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kobject.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+
+#include <asm/opal.h>
+
+#define DUMP_TYPE_FSP	0x01
+
+struct dump_obj {
+	struct kobject  kobj;
+	struct bin_attribute dump_attr;
+	uint32_t	id;  /* becomes object name */
+	uint32_t	type;
+	uint32_t	size;
+	char		*buffer;
+};
+#define to_dump_obj(x) container_of(x, struct dump_obj, kobj)
+
+struct dump_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct dump_obj *dump, struct dump_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct dump_obj *dump, struct dump_attribute *attr,
+			 const char *buf, size_t count);
+};
+#define to_dump_attr(x) container_of(x, struct dump_attribute, attr)
+
+static ssize_t dump_id_show(struct dump_obj *dump_obj,
+			    struct dump_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "0x%x\n", dump_obj->id);
+}
+
+static const char* dump_type_to_string(uint32_t type)
+{
+	switch (type) {
+	case 0x01: return "SP Dump";
+	case 0x02: return "System/Platform Dump";
+	case 0x03: return "SMA Dump";
+	default: return "unknown";
+	}
+}
+
+static ssize_t dump_type_show(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      char *buf)
+{
+	
+	return sprintf(buf, "0x%x %s\n", dump_obj->type,
+		       dump_type_to_string(dump_obj->type));
+}
+
+static ssize_t dump_ack_show(struct dump_obj *dump_obj,
+			     struct dump_attribute *attr,
+			     char *buf)
+{
+	return sprintf(buf, "ack - acknowledge dump\n");
+}
+
+/*
+ * Send acknowledgement to OPAL
+ */
+static int64_t dump_send_ack(uint32_t dump_id)
+{
+	int rc;
+
+	rc = opal_dump_ack(dump_id);
+	if (rc)
+		pr_warn("%s: Failed to send ack to Dump ID 0x%x (%d)\n",
+			__func__, dump_id, rc);
+	return rc;
+}
+
+static ssize_t dump_ack_store(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      const char *buf,
+			      size_t count)
+{
+	dump_send_ack(dump_obj->id);
+	sysfs_remove_file_self(&dump_obj->kobj, &attr->attr);
+	kobject_put(&dump_obj->kobj);
+	return count;
+}
+
+/* Attributes of a dump
+ * The binary attribute of the dump itself is dynamic
+ * due to the dynamic size of the dump
+ */
+static struct dump_attribute id_attribute =
+	__ATTR(id, 0666, dump_id_show, NULL);
+static struct dump_attribute type_attribute =
+	__ATTR(type, 0666, dump_type_show, NULL);
+static struct dump_attribute ack_attribute =
+	__ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store);
+
+static ssize_t init_dump_show(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "1 - initiate dump\n");
+}
+
+static int64_t dump_fips_init(uint8_t type)
+{
+	int rc;
+
+	rc = opal_dump_init(type);
+	if (rc)
+		pr_warn("%s: Failed to initiate FipS dump (%d)\n",
+			__func__, rc);
+	return rc;
+}
+
+static ssize_t init_dump_store(struct dump_obj *dump_obj,
+			       struct dump_attribute *attr,
+			       const char *buf,
+			       size_t count)
+{
+	dump_fips_init(DUMP_TYPE_FSP);
+	pr_info("%s: Initiated FSP dump\n", __func__);
+	return count;
+}
+
+static struct dump_attribute initiate_attribute =
+	__ATTR(initiate_dump, 0600, init_dump_show, init_dump_store);
+
+static struct attribute *initiate_attrs[] = {
+	&initiate_attribute.attr,
+	NULL,
+};
+
+static struct attribute_group initiate_attr_group = {
+	.attrs = initiate_attrs,
+};
+
+static struct kset *dump_kset;
+
+static ssize_t dump_attr_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *buf)
+{
+	struct dump_attribute *attribute;
+	struct dump_obj *dump;
+
+	attribute = to_dump_attr(attr);
+	dump = to_dump_obj(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(dump, attribute, buf);
+}
+
+static ssize_t dump_attr_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct dump_attribute *attribute;
+	struct dump_obj *dump;
+
+	attribute = to_dump_attr(attr);
+	dump = to_dump_obj(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(dump, attribute, buf, len);
+}
+
+static const struct sysfs_ops dump_sysfs_ops = {
+	.show = dump_attr_show,
+	.store = dump_attr_store,
+};
+
+static void dump_release(struct kobject *kobj)
+{
+	struct dump_obj *dump;
+
+	dump = to_dump_obj(kobj);
+	vfree(dump->buffer);
+	kfree(dump);
+}
+
+static struct attribute *dump_default_attrs[] = {
+	&id_attribute.attr,
+	&type_attribute.attr,
+	&ack_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type dump_ktype = {
+	.sysfs_ops = &dump_sysfs_ops,
+	.release = &dump_release,
+	.default_attrs = dump_default_attrs,
+};
+
+static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type)
+{
+	__be32 id, size, type;
+	int rc;
+
+	type = cpu_to_be32(0xffffffff);
+
+	rc = opal_dump_info2(&id, &size, &type);
+	if (rc == OPAL_PARAMETER)
+		rc = opal_dump_info(&id, &size);
+
+	*dump_id = be32_to_cpu(id);
+	*dump_size = be32_to_cpu(size);
+	*dump_type = be32_to_cpu(type);
+
+	if (rc)
+		pr_warn("%s: Failed to get dump info (%d)\n",
+			__func__, rc);
+	return rc;
+}
+
+static int64_t dump_read_data(struct dump_obj *dump)
+{
+	struct opal_sg_list *list;
+	uint64_t addr;
+	int64_t rc;
+
+	/* Allocate memory */
+	dump->buffer = vzalloc(PAGE_ALIGN(dump->size));
+	if (!dump->buffer) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Generate SG list */
+	list = opal_vmalloc_to_sg_list(dump->buffer, dump->size);
+	if (!list) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* First entry address */
+	addr = __pa(list);
+
+	/* Fetch data */
+	rc = OPAL_BUSY_EVENT;
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_dump_read(dump->id, addr);
+		if (rc == OPAL_BUSY_EVENT) {
+			opal_poll_events(NULL);
+			msleep(20);
+		}
+	}
+
+	if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL)
+		pr_warn("%s: Extract dump failed for ID 0x%x\n",
+			__func__, dump->id);
+
+	/* Free SG list */
+	opal_free_sg_list(list);
+
+out:
+	return rc;
+}
+
+static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj,
+			      struct bin_attribute *bin_attr,
+			      char *buffer, loff_t pos, size_t count)
+{
+	ssize_t rc;
+
+	struct dump_obj *dump = to_dump_obj(kobj);
+
+	if (!dump->buffer) {
+		rc = dump_read_data(dump);
+
+		if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) {
+			vfree(dump->buffer);
+			dump->buffer = NULL;
+
+			return -EIO;
+		}
+		if (rc == OPAL_PARTIAL) {
+			/* On a partial read, we just return EIO
+			 * and rely on userspace to ask us to try
+			 * again.
+			 */
+			pr_info("%s: Platform dump partially read.ID = 0x%x\n",
+				__func__, dump->id);
+			return -EIO;
+		}
+	}
+
+	memcpy(buffer, dump->buffer + pos, count);
+
+	/* You may think we could free the dump buffer now and retrieve
+	 * it again later if needed, but due to current firmware limitation,
+	 * that's not the case. So, once read into userspace once,
+	 * we keep the dump around until it's acknowledged by userspace.
+	 */
+
+	return count;
+}
+
+static struct dump_obj *create_dump_obj(uint32_t id, size_t size,
+					uint32_t type)
+{
+	struct dump_obj *dump;
+	int rc;
+
+	dump = kzalloc(sizeof(*dump), GFP_KERNEL);
+	if (!dump)
+		return NULL;
+
+	dump->kobj.kset = dump_kset;
+
+	kobject_init(&dump->kobj, &dump_ktype);
+
+	sysfs_bin_attr_init(&dump->dump_attr);
+
+	dump->dump_attr.attr.name = "dump";
+	dump->dump_attr.attr.mode = 0400;
+	dump->dump_attr.size = size;
+	dump->dump_attr.read = dump_attr_read;
+
+	dump->id = id;
+	dump->size = size;
+	dump->type = type;
+
+	rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id);
+	if (rc) {
+		kobject_put(&dump->kobj);
+		return NULL;
+	}
+
+	rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr);
+	if (rc) {
+		kobject_put(&dump->kobj);
+		return NULL;
+	}
+
+	pr_info("%s: New platform dump. ID = 0x%x Size %u\n",
+		__func__, dump->id, dump->size);
+
+	kobject_uevent(&dump->kobj, KOBJ_ADD);
+
+	return dump;
+}
+
+static int process_dump(void)
+{
+	int rc;
+	uint32_t dump_id, dump_size, dump_type;
+	struct dump_obj *dump;
+	char name[22];
+
+	rc = dump_read_info(&dump_id, &dump_size, &dump_type);
+	if (rc != OPAL_SUCCESS)
+		return rc;
+
+	sprintf(name, "0x%x-0x%x", dump_type, dump_id);
+
+	/* we may get notified twice, let's handle
+	 * that gracefully and not create two conflicting
+	 * entries.
+	 */
+	if (kset_find_obj(dump_kset, name))
+		return 0;
+
+	dump = create_dump_obj(dump_id, dump_size, dump_type);
+	if (!dump)
+		return -1;
+
+	return 0;
+}
+
+static void dump_work_fn(struct work_struct *work)
+{
+	process_dump();
+}
+
+static DECLARE_WORK(dump_work, dump_work_fn);
+
+static void schedule_process_dump(void)
+{
+	schedule_work(&dump_work);
+}
+
+/*
+ * New dump available notification
+ *
+ * Once we get notification, we add sysfs entries for it.
+ * We only fetch the dump on demand, and create sysfs asynchronously.
+ */
+static int dump_event(struct notifier_block *nb,
+		      unsigned long events, void *change)
+{
+	if (events & OPAL_EVENT_DUMP_AVAIL)
+		schedule_process_dump();
+
+	return 0;
+}
+
+static struct notifier_block dump_nb = {
+	.notifier_call  = dump_event,
+	.next           = NULL,
+	.priority       = 0
+};
+
+void __init opal_platform_dump_init(void)
+{
+	int rc;
+
+	dump_kset = kset_create_and_add("dump", NULL, opal_kobj);
+	if (!dump_kset) {
+		pr_warn("%s: Failed to create dump kset\n", __func__);
+		return;
+	}
+
+	rc = sysfs_create_group(&dump_kset->kobj, &initiate_attr_group);
+	if (rc) {
+		pr_warn("%s: Failed to create initiate dump attr group\n",
+			__func__);
+		kobject_put(&dump_kset->kobj);
+		return;
+	}
+
+	rc = opal_notifier_register(&dump_nb);
+	if (rc) {
+		pr_warn("%s: Can't register OPAL event notifier (%d)\n",
+			__func__, rc);
+		return;
+	}
+
+	opal_dump_resend_notification();
+}
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
new file mode 100644
index 00000000000..0ad533b617f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -0,0 +1,315 @@
+/*
+ * Error log support on PowerNV.
+ *
+ * Copyright 2013,2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/fcntl.h>
+#include <linux/kobject.h>
+#include <asm/uaccess.h>
+#include <asm/opal.h>
+
+struct elog_obj {
+	struct kobject kobj;
+	struct bin_attribute raw_attr;
+	uint64_t id;
+	uint64_t type;
+	size_t size;
+	char *buffer;
+};
+#define to_elog_obj(x) container_of(x, struct elog_obj, kobj)
+
+struct elog_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct elog_obj *elog, struct elog_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct elog_obj *elog, struct elog_attribute *attr,
+			 const char *buf, size_t count);
+};
+#define to_elog_attr(x) container_of(x, struct elog_attribute, attr)
+
+static ssize_t elog_id_show(struct elog_obj *elog_obj,
+			    struct elog_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "0x%llx\n", elog_obj->id);
+}
+
+static const char *elog_type_to_string(uint64_t type)
+{
+	switch (type) {
+	case 0: return "PEL";
+	default: return "unknown";
+	}
+}
+
+static ssize_t elog_type_show(struct elog_obj *elog_obj,
+			      struct elog_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "0x%llx %s\n",
+		       elog_obj->type,
+		       elog_type_to_string(elog_obj->type));
+}
+
+static ssize_t elog_ack_show(struct elog_obj *elog_obj,
+			     struct elog_attribute *attr,
+			     char *buf)
+{
+	return sprintf(buf, "ack - acknowledge log message\n");
+}
+
+static ssize_t elog_ack_store(struct elog_obj *elog_obj,
+			      struct elog_attribute *attr,
+			      const char *buf,
+			      size_t count)
+{
+	opal_send_ack_elog(elog_obj->id);
+	sysfs_remove_file_self(&elog_obj->kobj, &attr->attr);
+	kobject_put(&elog_obj->kobj);
+	return count;
+}
+
+static struct elog_attribute id_attribute =
+	__ATTR(id, 0666, elog_id_show, NULL);
+static struct elog_attribute type_attribute =
+	__ATTR(type, 0666, elog_type_show, NULL);
+static struct elog_attribute ack_attribute =
+	__ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store);
+
+static struct kset *elog_kset;
+
+static ssize_t elog_attr_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *buf)
+{
+	struct elog_attribute *attribute;
+	struct elog_obj *elog;
+
+	attribute = to_elog_attr(attr);
+	elog = to_elog_obj(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(elog, attribute, buf);
+}
+
+static ssize_t elog_attr_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct elog_attribute *attribute;
+	struct elog_obj *elog;
+
+	attribute = to_elog_attr(attr);
+	elog = to_elog_obj(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(elog, attribute, buf, len);
+}
+
+static const struct sysfs_ops elog_sysfs_ops = {
+	.show = elog_attr_show,
+	.store = elog_attr_store,
+};
+
+static void elog_release(struct kobject *kobj)
+{
+	struct elog_obj *elog;
+
+	elog = to_elog_obj(kobj);
+	kfree(elog->buffer);
+	kfree(elog);
+}
+
+static struct attribute *elog_default_attrs[] = {
+	&id_attribute.attr,
+	&type_attribute.attr,
+	&ack_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type elog_ktype = {
+	.sysfs_ops = &elog_sysfs_ops,
+	.release = &elog_release,
+	.default_attrs = elog_default_attrs,
+};
+
+/* Maximum size of a single log on FSP is 16KB */
+#define OPAL_MAX_ERRLOG_SIZE	16384
+
+static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj,
+			     struct bin_attribute *bin_attr,
+			     char *buffer, loff_t pos, size_t count)
+{
+	int opal_rc;
+
+	struct elog_obj *elog = to_elog_obj(kobj);
+
+	/* We may have had an error reading before, so let's retry */
+	if (!elog->buffer) {
+		elog->buffer = kzalloc(elog->size, GFP_KERNEL);
+		if (!elog->buffer)
+			return -EIO;
+
+		opal_rc = opal_read_elog(__pa(elog->buffer),
+					 elog->size, elog->id);
+		if (opal_rc != OPAL_SUCCESS) {
+			pr_err("ELOG: log read failed for log-id=%llx\n",
+			       elog->id);
+			kfree(elog->buffer);
+			elog->buffer = NULL;
+			return -EIO;
+		}
+	}
+
+	memcpy(buffer, elog->buffer + pos, count);
+
+	return count;
+}
+
+static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type)
+{
+	struct elog_obj *elog;
+	int rc;
+
+	elog = kzalloc(sizeof(*elog), GFP_KERNEL);
+	if (!elog)
+		return NULL;
+
+	elog->kobj.kset = elog_kset;
+
+	kobject_init(&elog->kobj, &elog_ktype);
+
+	sysfs_bin_attr_init(&elog->raw_attr);
+
+	elog->raw_attr.attr.name = "raw";
+	elog->raw_attr.attr.mode = 0400;
+	elog->raw_attr.size = size;
+	elog->raw_attr.read = raw_attr_read;
+
+	elog->id = id;
+	elog->size = size;
+	elog->type = type;
+
+	elog->buffer = kzalloc(elog->size, GFP_KERNEL);
+
+	if (elog->buffer) {
+		rc = opal_read_elog(__pa(elog->buffer),
+					 elog->size, elog->id);
+		if (rc != OPAL_SUCCESS) {
+			pr_err("ELOG: log read failed for log-id=%llx\n",
+			       elog->id);
+			kfree(elog->buffer);
+			elog->buffer = NULL;
+		}
+	}
+
+	rc = kobject_add(&elog->kobj, NULL, "0x%llx", id);
+	if (rc) {
+		kobject_put(&elog->kobj);
+		return NULL;
+	}
+
+	rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr);
+	if (rc) {
+		kobject_put(&elog->kobj);
+		return NULL;
+	}
+
+	kobject_uevent(&elog->kobj, KOBJ_ADD);
+
+	return elog;
+}
+
+static void elog_work_fn(struct work_struct *work)
+{
+	__be64 size;
+	__be64 id;
+	__be64 type;
+	uint64_t elog_size;
+	uint64_t log_id;
+	uint64_t elog_type;
+	int rc;
+	char name[2+16+1];
+
+	rc = opal_get_elog_size(&id, &size, &type);
+	if (rc != OPAL_SUCCESS) {
+		pr_err("ELOG: OPAL log info read failed\n");
+		return;
+	}
+
+	elog_size = be64_to_cpu(size);
+	log_id = be64_to_cpu(id);
+	elog_type = be64_to_cpu(type);
+
+	WARN_ON(elog_size > OPAL_MAX_ERRLOG_SIZE);
+
+	if (elog_size >= OPAL_MAX_ERRLOG_SIZE)
+		elog_size  =  OPAL_MAX_ERRLOG_SIZE;
+
+	sprintf(name, "0x%llx", log_id);
+
+	/* we may get notified twice, let's handle
+	 * that gracefully and not create two conflicting
+	 * entries.
+	 */
+	if (kset_find_obj(elog_kset, name))
+		return;
+
+	create_elog_obj(log_id, elog_size, elog_type);
+}
+
+static DECLARE_WORK(elog_work, elog_work_fn);
+
+static int elog_event(struct notifier_block *nb,
+				unsigned long events, void *change)
+{
+	/* check for error log event */
+	if (events & OPAL_EVENT_ERROR_LOG_AVAIL)
+		schedule_work(&elog_work);
+	return 0;
+}
+
+static struct notifier_block elog_nb = {
+	.notifier_call  = elog_event,
+	.next           = NULL,
+	.priority       = 0
+};
+
+int __init opal_elog_init(void)
+{
+	int rc = 0;
+
+	elog_kset = kset_create_and_add("elog", NULL, opal_kobj);
+	if (!elog_kset) {
+		pr_warn("%s: failed to create elog kset\n", __func__);
+		return -1;
+	}
+
+	rc = opal_notifier_register(&elog_nb);
+	if (rc) {
+		pr_err("%s: Can't register OPAL event notifier (%d)\n",
+		__func__, rc);
+		return rc;
+	}
+
+	/* We are now ready to pull error logs from opal. */
+	opal_resend_pending_logs();
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
new file mode 100644
index 00000000000..5c21d9c07f4
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -0,0 +1,588 @@
+/*
+ * PowerNV OPAL Firmware Update Interface
+ *
+ * Copyright 2013 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define DEBUG
+
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+
+#include <asm/opal.h>
+
+/* FLASH status codes */
+#define FLASH_NO_OP		-1099	/* No operation initiated by user */
+#define FLASH_NO_AUTH		-9002	/* Not a service authority partition */
+
+/* Validate image status values */
+#define VALIDATE_IMG_READY	-1001	/* Image ready for validation */
+#define VALIDATE_IMG_INCOMPLETE	-1002	/* User copied < VALIDATE_BUF_SIZE */
+
+/* Manage image status values */
+#define MANAGE_ACTIVE_ERR	-9001	/* Cannot overwrite active img */
+
+/* Flash image status values */
+#define FLASH_IMG_READY		0	/* Img ready for flash on reboot */
+#define FLASH_INVALID_IMG	-1003	/* Flash image shorter than expected */
+#define FLASH_IMG_NULL_DATA	-1004	/* Bad data in sg list entry */
+#define FLASH_IMG_BAD_LEN	-1005	/* Bad length in sg list entry */
+
+/* Manage operation tokens */
+#define FLASH_REJECT_TMP_SIDE	0	/* Reject temporary fw image */
+#define FLASH_COMMIT_TMP_SIDE	1	/* Commit temporary fw image */
+
+/* Update tokens */
+#define FLASH_UPDATE_CANCEL	0	/* Cancel update request */
+#define FLASH_UPDATE_INIT	1	/* Initiate update */
+
+/* Validate image update result tokens */
+#define VALIDATE_TMP_UPDATE	0     /* T side will be updated */
+#define VALIDATE_FLASH_AUTH	1     /* Partition does not have authority */
+#define VALIDATE_INVALID_IMG	2     /* Candidate image is not valid */
+#define VALIDATE_CUR_UNKNOWN	3     /* Current fixpack level is unknown */
+/*
+ * Current T side will be committed to P side before being replace with new
+ * image, and the new image is downlevel from current image
+ */
+#define VALIDATE_TMP_COMMIT_DL	4
+/*
+ * Current T side will be committed to P side before being replaced with new
+ * image
+ */
+#define VALIDATE_TMP_COMMIT	5
+/*
+ * T side will be updated with a downlevel image
+ */
+#define VALIDATE_TMP_UPDATE_DL	6
+/*
+ * The candidate image's release date is later than the system's firmware
+ * service entitlement date - service warranty period has expired
+ */
+#define VALIDATE_OUT_OF_WRNTY	7
+
+/* Validate buffer size */
+#define VALIDATE_BUF_SIZE	4096
+
+/* XXX: Assume candidate image size is <= 1GB */
+#define MAX_IMAGE_SIZE	0x40000000
+
+/* Image status */
+enum {
+	IMAGE_INVALID,
+	IMAGE_LOADING,
+	IMAGE_READY,
+};
+
+/* Candidate image data */
+struct image_data_t {
+	int		status;
+	void		*data;
+	uint32_t	size;
+};
+
+/* Candidate image header */
+struct image_header_t {
+	uint16_t	magic;
+	uint16_t	version;
+	uint32_t	size;
+};
+
+struct validate_flash_t {
+	int		status;		/* Return status */
+	void		*buf;		/* Candidate image buffer */
+	uint32_t	buf_size;	/* Image size */
+	uint32_t	result;		/* Update results token */
+};
+
+struct manage_flash_t {
+	int status;		/* Return status */
+};
+
+struct update_flash_t {
+	int status;		/* Return status */
+};
+
+static struct image_header_t	image_header;
+static struct image_data_t	image_data;
+static struct validate_flash_t	validate_flash_data;
+static struct manage_flash_t	manage_flash_data;
+static struct update_flash_t	update_flash_data;
+
+static DEFINE_MUTEX(image_data_mutex);
+
+/*
+ * Validate candidate image
+ */
+static inline void opal_flash_validate(void)
+{
+	long ret;
+	void *buf = validate_flash_data.buf;
+	__be32 size = cpu_to_be32(validate_flash_data.buf_size);
+	__be32 result;
+
+	ret = opal_validate_flash(__pa(buf), &size, &result);
+
+	validate_flash_data.status = ret;
+	validate_flash_data.buf_size = be32_to_cpu(size);
+	validate_flash_data.result = be32_to_cpu(result);
+}
+
+/*
+ * Validate output format:
+ *     validate result token
+ *     current image version details
+ *     new image version details
+ */
+static ssize_t validate_show(struct kobject *kobj,
+			     struct kobj_attribute *attr, char *buf)
+{
+	struct validate_flash_t *args_buf = &validate_flash_data;
+	int len;
+
+	/* Candidate image is not validated */
+	if (args_buf->status < VALIDATE_TMP_UPDATE) {
+		len = sprintf(buf, "%d\n", args_buf->status);
+		goto out;
+	}
+
+	/* Result token */
+	len = sprintf(buf, "%d\n", args_buf->result);
+
+	/* Current and candidate image version details */
+	if ((args_buf->result != VALIDATE_TMP_UPDATE) &&
+	    (args_buf->result < VALIDATE_CUR_UNKNOWN))
+		goto out;
+
+	if (args_buf->buf_size > (VALIDATE_BUF_SIZE - len)) {
+		memcpy(buf + len, args_buf->buf, VALIDATE_BUF_SIZE - len);
+		len = VALIDATE_BUF_SIZE;
+	} else {
+		memcpy(buf + len, args_buf->buf, args_buf->buf_size);
+		len += args_buf->buf_size;
+	}
+out:
+	/* Set status to default */
+	args_buf->status = FLASH_NO_OP;
+	return len;
+}
+
+/*
+ * Validate candidate firmware image
+ *
+ * Note:
+ *   We are only interested in first 4K bytes of the
+ *   candidate image.
+ */
+static ssize_t validate_store(struct kobject *kobj,
+			      struct kobj_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct validate_flash_t *args_buf = &validate_flash_data;
+
+	if (buf[0] != '1')
+		return -EINVAL;
+
+	mutex_lock(&image_data_mutex);
+
+	if (image_data.status != IMAGE_READY ||
+	    image_data.size < VALIDATE_BUF_SIZE) {
+		args_buf->result = VALIDATE_INVALID_IMG;
+		args_buf->status = VALIDATE_IMG_INCOMPLETE;
+		goto out;
+	}
+
+	/* Copy first 4k bytes of candidate image */
+	memcpy(args_buf->buf, image_data.data, VALIDATE_BUF_SIZE);
+
+	args_buf->status = VALIDATE_IMG_READY;
+	args_buf->buf_size = VALIDATE_BUF_SIZE;
+
+	/* Validate candidate image */
+	opal_flash_validate();
+
+out:
+	mutex_unlock(&image_data_mutex);
+	return count;
+}
+
+/*
+ * Manage flash routine
+ */
+static inline void opal_flash_manage(uint8_t op)
+{
+	struct manage_flash_t *const args_buf = &manage_flash_data;
+
+	args_buf->status = opal_manage_flash(op);
+}
+
+/*
+ * Show manage flash status
+ */
+static ssize_t manage_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct manage_flash_t *const args_buf = &manage_flash_data;
+	int rc;
+
+	rc = sprintf(buf, "%d\n", args_buf->status);
+	/* Set status to default*/
+	args_buf->status = FLASH_NO_OP;
+	return rc;
+}
+
+/*
+ * Manage operations:
+ *   0 - Reject
+ *   1 - Commit
+ */
+static ssize_t manage_store(struct kobject *kobj,
+			    struct kobj_attribute *attr,
+			    const char *buf, size_t count)
+{
+	uint8_t op;
+	switch (buf[0]) {
+	case '0':
+		op = FLASH_REJECT_TMP_SIDE;
+		break;
+	case '1':
+		op = FLASH_COMMIT_TMP_SIDE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* commit/reject temporary image */
+	opal_flash_manage(op);
+	return count;
+}
+
+/*
+ * OPAL update flash
+ */
+static int opal_flash_update(int op)
+{
+	struct opal_sg_list *list;
+	unsigned long addr;
+	int64_t rc = OPAL_PARAMETER;
+
+	if (op == FLASH_UPDATE_CANCEL) {
+		pr_alert("FLASH: Image update cancelled\n");
+		addr = '\0';
+		goto flash;
+	}
+
+	list = opal_vmalloc_to_sg_list(image_data.data, image_data.size);
+	if (!list)
+		goto invalid_img;
+
+	/* First entry address */
+	addr = __pa(list);
+
+flash:
+	rc = opal_update_flash(addr);
+
+invalid_img:
+	return rc;
+}
+
+/* Return CPUs to OPAL before starting FW update */
+static void flash_return_cpu(void *info)
+{
+	int cpu = smp_processor_id();
+
+	if (!cpu_online(cpu))
+		return;
+
+	/* Disable IRQ */
+	hard_irq_disable();
+
+	/* Return the CPU to OPAL */
+	opal_return_cpu();
+}
+
+/* This gets called just before system reboots */
+void opal_flash_term_callback(void)
+{
+	struct cpumask mask;
+
+	if (update_flash_data.status != FLASH_IMG_READY)
+		return;
+
+	pr_alert("FLASH: Flashing new firmware\n");
+	pr_alert("FLASH: Image is %u bytes\n", image_data.size);
+	pr_alert("FLASH: Performing flash and reboot/shutdown\n");
+	pr_alert("FLASH: This will take several minutes. Do not power off!\n");
+
+	/* Small delay to help getting the above message out */
+	msleep(500);
+
+	/* Return secondary CPUs to firmware */
+	cpumask_copy(&mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &mask);
+	if (!cpumask_empty(&mask))
+		smp_call_function_many(&mask,
+				       flash_return_cpu, NULL, false);
+	/* Hard disable interrupts */
+	hard_irq_disable();
+}
+
+/*
+ * Show candidate image status
+ */
+static ssize_t update_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct update_flash_t *const args_buf = &update_flash_data;
+	return sprintf(buf, "%d\n", args_buf->status);
+}
+
+/*
+ * Set update image flag
+ *  1 - Flash new image
+ *  0 - Cancel flash request
+ */
+static ssize_t update_store(struct kobject *kobj,
+			    struct kobj_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct update_flash_t *const args_buf = &update_flash_data;
+	int rc = count;
+
+	mutex_lock(&image_data_mutex);
+
+	switch (buf[0]) {
+	case '0':
+		if (args_buf->status == FLASH_IMG_READY)
+			opal_flash_update(FLASH_UPDATE_CANCEL);
+		args_buf->status = FLASH_NO_OP;
+		break;
+	case '1':
+		/* Image is loaded? */
+		if (image_data.status == IMAGE_READY)
+			args_buf->status =
+				opal_flash_update(FLASH_UPDATE_INIT);
+		else
+			args_buf->status = FLASH_INVALID_IMG;
+		break;
+	default:
+		rc = -EINVAL;
+	}
+
+	mutex_unlock(&image_data_mutex);
+	return rc;
+}
+
+/*
+ * Free image buffer
+ */
+static void free_image_buf(void)
+{
+	void *addr;
+	int size;
+
+	addr = image_data.data;
+	size = PAGE_ALIGN(image_data.size);
+	while (size > 0) {
+		ClearPageReserved(vmalloc_to_page(addr));
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+	vfree(image_data.data);
+	image_data.data = NULL;
+	image_data.status = IMAGE_INVALID;
+}
+
+/*
+ * Allocate image buffer.
+ */
+static int alloc_image_buf(char *buffer, size_t count)
+{
+	void *addr;
+	int size;
+
+	if (count < sizeof(struct image_header_t)) {
+		pr_warn("FLASH: Invalid candidate image\n");
+		return -EINVAL;
+	}
+
+	memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t));
+	image_data.size = be32_to_cpu(image_header.size);
+	pr_debug("FLASH: Candidate image size = %u\n", image_data.size);
+
+	if (image_data.size > MAX_IMAGE_SIZE) {
+		pr_warn("FLASH: Too large image\n");
+		return -EINVAL;
+	}
+	if (image_data.size < VALIDATE_BUF_SIZE) {
+		pr_warn("FLASH: Image is shorter than expected\n");
+		return -EINVAL;
+	}
+
+	image_data.data = vzalloc(PAGE_ALIGN(image_data.size));
+	if (!image_data.data) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		return -ENOMEM;
+	}
+
+	/* Pin memory */
+	addr = image_data.data;
+	size = PAGE_ALIGN(image_data.size);
+	while (size > 0) {
+		SetPageReserved(vmalloc_to_page(addr));
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	image_data.status = IMAGE_LOADING;
+	return 0;
+}
+
+/*
+ * Copy candidate image
+ *
+ * Parse candidate image header to get total image size
+ * and pre-allocate required memory.
+ */
+static ssize_t image_data_write(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr,
+				char *buffer, loff_t pos, size_t count)
+{
+	int rc;
+
+	mutex_lock(&image_data_mutex);
+
+	/* New image ? */
+	if (pos == 0) {
+		/* Free memory, if already allocated */
+		if (image_data.data)
+			free_image_buf();
+
+		/* Cancel outstanding image update request */
+		if (update_flash_data.status == FLASH_IMG_READY)
+			opal_flash_update(FLASH_UPDATE_CANCEL);
+
+		/* Allocate memory */
+		rc = alloc_image_buf(buffer, count);
+		if (rc)
+			goto out;
+	}
+
+	if (image_data.status != IMAGE_LOADING) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	if ((pos + count) > image_data.size) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	memcpy(image_data.data + pos, (void *)buffer, count);
+	rc = count;
+
+	/* Set image status */
+	if ((pos + count) == image_data.size) {
+		pr_debug("FLASH: Candidate image loaded....\n");
+		image_data.status = IMAGE_READY;
+	}
+
+out:
+	mutex_unlock(&image_data_mutex);
+	return rc;
+}
+
+/*
+ * sysfs interface :
+ *  OPAL uses below sysfs files for code update.
+ *  We create these files under /sys/firmware/opal.
+ *
+ *   image		: Interface to load candidate firmware image
+ *   validate_flash	: Validate firmware image
+ *   manage_flash	: Commit/Reject firmware image
+ *   update_flash	: Flash new firmware image
+ *
+ */
+static struct bin_attribute image_data_attr = {
+	.attr = {.name = "image", .mode = 0200},
+	.size = MAX_IMAGE_SIZE,	/* Limit image size */
+	.write = image_data_write,
+};
+
+static struct kobj_attribute validate_attribute =
+	__ATTR(validate_flash, 0600, validate_show, validate_store);
+
+static struct kobj_attribute manage_attribute =
+	__ATTR(manage_flash, 0600, manage_show, manage_store);
+
+static struct kobj_attribute update_attribute =
+	__ATTR(update_flash, 0600, update_show, update_store);
+
+static struct attribute *image_op_attrs[] = {
+	&validate_attribute.attr,
+	&manage_attribute.attr,
+	&update_attribute.attr,
+	NULL	/* need to NULL terminate the list of attributes */
+};
+
+static struct attribute_group image_op_attr_group = {
+	.attrs = image_op_attrs,
+};
+
+void __init opal_flash_init(void)
+{
+	int ret;
+
+	/* Allocate validate image buffer */
+	validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL);
+	if (!validate_flash_data.buf) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		return;
+	}
+
+	/* Make sure /sys/firmware/opal directory is created */
+	if (!opal_kobj) {
+		pr_warn("FLASH: opal kobject is not available\n");
+		goto nokobj;
+	}
+
+	/* Create the sysfs files */
+	ret = sysfs_create_group(opal_kobj, &image_op_attr_group);
+	if (ret) {
+		pr_warn("FLASH: Failed to create sysfs files\n");
+		goto nokobj;
+	}
+
+	ret = sysfs_create_bin_file(opal_kobj, &image_data_attr);
+	if (ret) {
+		pr_warn("FLASH: Failed to create sysfs files\n");
+		goto nosysfs_file;
+	}
+
+	/* Set default status */
+	validate_flash_data.status = FLASH_NO_OP;
+	manage_flash_data.status = FLASH_NO_OP;
+	update_flash_data.status = FLASH_NO_OP;
+	image_data.status = IMAGE_INVALID;
+	return;
+
+nosysfs_file:
+	sysfs_remove_group(opal_kobj, &image_op_attr_group);
+
+nokobj:
+	kfree(validate_flash_data.buf);
+	return;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
new file mode 100644
index 00000000000..f04b4d8aca5
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -0,0 +1,355 @@
+/*
+ * PowerNV LPC bus handling.
+ *
+ * Copyright 2013 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/bug.h>
+#include <linux/debugfs.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/xics.h>
+#include <asm/opal.h>
+#include <asm/prom.h>
+#include <asm/uaccess.h>
+#include <asm/debug.h>
+
+static int opal_lpc_chip_id = -1;
+
+static u8 opal_lpc_inb(unsigned long port)
+{
+	int64_t rc;
+	__be32 data;
+
+	if (opal_lpc_chip_id < 0 || port > 0xffff)
+		return 0xff;
+	rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 1);
+	return rc ? 0xff : be32_to_cpu(data);
+}
+
+static __le16 __opal_lpc_inw(unsigned long port)
+{
+	int64_t rc;
+	__be32 data;
+
+	if (opal_lpc_chip_id < 0 || port > 0xfffe)
+		return 0xffff;
+	if (port & 1)
+		return (__le16)opal_lpc_inb(port) << 8 | opal_lpc_inb(port + 1);
+	rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 2);
+	return rc ? 0xffff : be32_to_cpu(data);
+}
+static u16 opal_lpc_inw(unsigned long port)
+{
+	return le16_to_cpu(__opal_lpc_inw(port));
+}
+
+static __le32 __opal_lpc_inl(unsigned long port)
+{
+	int64_t rc;
+	__be32 data;
+
+	if (opal_lpc_chip_id < 0 || port > 0xfffc)
+		return 0xffffffff;
+	if (port & 3)
+		return (__le32)opal_lpc_inb(port    ) << 24 |
+		       (__le32)opal_lpc_inb(port + 1) << 16 |
+		       (__le32)opal_lpc_inb(port + 2) <<  8 |
+			       opal_lpc_inb(port + 3);
+	rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 4);
+	return rc ? 0xffffffff : be32_to_cpu(data);
+}
+
+static u32 opal_lpc_inl(unsigned long port)
+{
+	return le32_to_cpu(__opal_lpc_inl(port));
+}
+
+static void opal_lpc_outb(u8 val, unsigned long port)
+{
+	if (opal_lpc_chip_id < 0 || port > 0xffff)
+		return;
+	opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 1);
+}
+
+static void __opal_lpc_outw(__le16 val, unsigned long port)
+{
+	if (opal_lpc_chip_id < 0 || port > 0xfffe)
+		return;
+	if (port & 1) {
+		opal_lpc_outb(val >> 8, port);
+		opal_lpc_outb(val     , port + 1);
+		return;
+	}
+	opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 2);
+}
+
+static void opal_lpc_outw(u16 val, unsigned long port)
+{
+	__opal_lpc_outw(cpu_to_le16(val), port);
+}
+
+static void __opal_lpc_outl(__le32 val, unsigned long port)
+{
+	if (opal_lpc_chip_id < 0 || port > 0xfffc)
+		return;
+	if (port & 3) {
+		opal_lpc_outb(val >> 24, port);
+		opal_lpc_outb(val >> 16, port + 1);
+		opal_lpc_outb(val >>  8, port + 2);
+		opal_lpc_outb(val      , port + 3);
+		return;
+	}
+	opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 4);
+}
+
+static void opal_lpc_outl(u32 val, unsigned long port)
+{
+	__opal_lpc_outl(cpu_to_le32(val), port);
+}
+
+static void opal_lpc_insb(unsigned long p, void *b, unsigned long c)
+{
+	u8 *ptr = b;
+
+	while(c--)
+		*(ptr++) = opal_lpc_inb(p);
+}
+
+static void opal_lpc_insw(unsigned long p, void *b, unsigned long c)
+{
+	__le16 *ptr = b;
+
+	while(c--)
+		*(ptr++) = __opal_lpc_inw(p);
+}
+
+static void opal_lpc_insl(unsigned long p, void *b, unsigned long c)
+{
+	__le32 *ptr = b;
+
+	while(c--)
+		*(ptr++) = __opal_lpc_inl(p);
+}
+
+static void opal_lpc_outsb(unsigned long p, const void *b, unsigned long c)
+{
+	const u8 *ptr = b;
+
+	while(c--)
+		opal_lpc_outb(*(ptr++), p);
+}
+
+static void opal_lpc_outsw(unsigned long p, const void *b, unsigned long c)
+{
+	const __le16 *ptr = b;
+
+	while(c--)
+		__opal_lpc_outw(*(ptr++), p);
+}
+
+static void opal_lpc_outsl(unsigned long p, const void *b, unsigned long c)
+{
+	const __le32 *ptr = b;
+
+	while(c--)
+		__opal_lpc_outl(*(ptr++), p);
+}
+
+static const struct ppc_pci_io opal_lpc_io = {
+	.inb	= opal_lpc_inb,
+	.inw	= opal_lpc_inw,
+	.inl	= opal_lpc_inl,
+	.outb	= opal_lpc_outb,
+	.outw	= opal_lpc_outw,
+	.outl	= opal_lpc_outl,
+	.insb	= opal_lpc_insb,
+	.insw	= opal_lpc_insw,
+	.insl	= opal_lpc_insl,
+	.outsb	= opal_lpc_outsb,
+	.outsw	= opal_lpc_outsw,
+	.outsl	= opal_lpc_outsl,
+};
+
+#ifdef CONFIG_DEBUG_FS
+struct lpc_debugfs_entry {
+	enum OpalLPCAddressType lpc_type;
+};
+
+static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf,
+			      size_t count, loff_t *ppos)
+{
+	struct lpc_debugfs_entry *lpc = filp->private_data;
+	u32 data, pos, len, todo;
+	int rc;
+
+	if (!access_ok(VERIFY_WRITE, ubuf, count))
+		return -EFAULT;
+
+	todo = count;
+	while (todo) {
+		pos = *ppos;
+
+		/*
+		 * Select access size based on count and alignment and
+		 * access type. IO and MEM only support byte acceses,
+		 * FW supports all 3.
+		 */
+		len = 1;
+		if (lpc->lpc_type == OPAL_LPC_FW) {
+			if (todo > 3 && (pos & 3) == 0)
+				len = 4;
+			else if (todo > 1 && (pos & 1) == 0)
+				len = 2;
+		}
+		rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos,
+				   &data, len);
+		if (rc)
+			return -ENXIO;
+		switch(len) {
+		case 4:
+			rc = __put_user((u32)data, (u32 __user *)ubuf);
+			break;
+		case 2:
+			rc = __put_user((u16)data, (u16 __user *)ubuf);
+			break;
+		default:
+			rc = __put_user((u8)data, (u8 __user *)ubuf);
+			break;
+		}
+		if (rc)
+			return -EFAULT;
+		*ppos += len;
+		ubuf += len;
+		todo -= len;
+	}
+
+	return count;
+}
+
+static ssize_t lpc_debug_write(struct file *filp, const char __user *ubuf,
+			       size_t count, loff_t *ppos)
+{
+	struct lpc_debugfs_entry *lpc = filp->private_data;
+	u32 data, pos, len, todo;
+	int rc;
+
+	if (!access_ok(VERIFY_READ, ubuf, count))
+		return -EFAULT;
+
+	todo = count;
+	while (todo) {
+		pos = *ppos;
+
+		/*
+		 * Select access size based on count and alignment and
+		 * access type. IO and MEM only support byte acceses,
+		 * FW supports all 3.
+		 */
+		len = 1;
+		if (lpc->lpc_type == OPAL_LPC_FW) {
+			if (todo > 3 && (pos & 3) == 0)
+				len = 4;
+			else if (todo > 1 && (pos & 1) == 0)
+				len = 2;
+		}
+		switch(len) {
+		case 4:
+			rc = __get_user(data, (u32 __user *)ubuf);
+			break;
+		case 2:
+			rc = __get_user(data, (u16 __user *)ubuf);
+			break;
+		default:
+			rc = __get_user(data, (u8 __user *)ubuf);
+			break;
+		}
+		if (rc)
+			return -EFAULT;
+
+		rc = opal_lpc_write(opal_lpc_chip_id, lpc->lpc_type, pos,
+				    data, len);
+		if (rc)
+			return -ENXIO;
+		*ppos += len;
+		ubuf += len;
+		todo -= len;
+	}
+
+	return count;
+}
+
+static const struct file_operations lpc_fops = {
+	.read =		lpc_debug_read,
+	.write =	lpc_debug_write,
+	.open =		simple_open,
+	.llseek =	default_llseek,
+};
+
+static int opal_lpc_debugfs_create_type(struct dentry *folder,
+					const char *fname,
+					enum OpalLPCAddressType type)
+{
+	struct lpc_debugfs_entry *entry;
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+	entry->lpc_type = type;
+	debugfs_create_file(fname, 0600, folder, entry, &lpc_fops);
+	return 0;
+}
+
+static int opal_lpc_init_debugfs(void)
+{
+	struct dentry *root;
+	int rc = 0;
+
+	if (opal_lpc_chip_id < 0)
+		return -ENODEV;
+
+	root = debugfs_create_dir("lpc", powerpc_debugfs_root);
+
+	rc |= opal_lpc_debugfs_create_type(root, "io", OPAL_LPC_IO);
+	rc |= opal_lpc_debugfs_create_type(root, "mem", OPAL_LPC_MEM);
+	rc |= opal_lpc_debugfs_create_type(root, "fw", OPAL_LPC_FW);
+	return rc;
+}
+device_initcall(opal_lpc_init_debugfs);
+#endif  /* CONFIG_DEBUG_FS */
+
+void opal_lpc_init(void)
+{
+	struct device_node *np;
+
+	/*
+	 * Look for a Power8 LPC bus tagged as "primary",
+	 * we currently support only one though the OPAL APIs
+	 * support any number.
+	 */
+	for_each_compatible_node(np, NULL, "ibm,power8-lpc") {
+		if (!of_device_is_available(np))
+			continue;
+		if (!of_get_property(np, "primary", NULL))
+			continue;
+		opal_lpc_chip_id = of_get_ibm_chip_id(np);
+		break;
+	}
+	if (opal_lpc_chip_id < 0)
+		return;
+
+	/* Setup special IO ops */
+	ppc_pci_io = opal_lpc_io;
+	isa_io_special = true;
+
+	pr_info("OPAL: Power8 LPC bus found, chip ID %d\n", opal_lpc_chip_id);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
new file mode 100644
index 00000000000..b17a34b695e
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -0,0 +1,146 @@
+/*
+ * OPAL asynchronus Memory error handling support in PowreNV.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+#include <asm/cputable.h>
+
+static int opal_mem_err_nb_init;
+static LIST_HEAD(opal_memory_err_list);
+static DEFINE_SPINLOCK(opal_mem_err_lock);
+
+struct OpalMsgNode {
+	struct list_head list;
+	struct opal_msg msg;
+};
+
+static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt)
+{
+	uint64_t paddr_start, paddr_end;
+
+	pr_debug("%s: Retrived memory error event, type: 0x%x\n",
+		  __func__, merr_evt->type);
+	switch (merr_evt->type) {
+	case OPAL_MEM_ERR_TYPE_RESILIENCE:
+		paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start);
+		paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end);
+		break;
+	case OPAL_MEM_ERR_TYPE_DYN_DALLOC:
+		paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start);
+		paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end);
+		break;
+	default:
+		return;
+	}
+
+	for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) {
+		memory_failure(paddr_start >> PAGE_SHIFT, 0, 0);
+	}
+}
+
+static void handle_memory_error(void)
+{
+	unsigned long flags;
+	struct OpalMemoryErrorData *merr_evt;
+	struct OpalMsgNode *msg_node;
+
+	spin_lock_irqsave(&opal_mem_err_lock, flags);
+	while (!list_empty(&opal_memory_err_list)) {
+		 msg_node = list_entry(opal_memory_err_list.next,
+					   struct OpalMsgNode, list);
+		list_del(&msg_node->list);
+		spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+
+		merr_evt = (struct OpalMemoryErrorData *)
+					&msg_node->msg.params[0];
+		handle_memory_error_event(merr_evt);
+		kfree(msg_node);
+		spin_lock_irqsave(&opal_mem_err_lock, flags);
+	}
+	spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+}
+
+static void mem_error_handler(struct work_struct *work)
+{
+	handle_memory_error();
+}
+
+static DECLARE_WORK(mem_error_work, mem_error_handler);
+
+/*
+ * opal_memory_err_event - notifier handler that queues up the opal message
+ * to be preocessed later.
+ */
+static int opal_memory_err_event(struct notifier_block *nb,
+			  unsigned long msg_type, void *msg)
+{
+	unsigned long flags;
+	struct OpalMsgNode *msg_node;
+
+	if (msg_type != OPAL_MSG_MEM_ERR)
+		return 0;
+
+	msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+	if (!msg_node) {
+		pr_err("MEMORY_ERROR: out of memory, Opal message event not"
+		       "handled\n");
+		return -ENOMEM;
+	}
+	memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+
+	spin_lock_irqsave(&opal_mem_err_lock, flags);
+	list_add(&msg_node->list, &opal_memory_err_list);
+	spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+
+	schedule_work(&mem_error_work);
+	return 0;
+}
+
+static struct notifier_block opal_mem_err_nb = {
+	.notifier_call	= opal_memory_err_event,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+static int __init opal_mem_err_init(void)
+{
+	int ret;
+
+	if (!opal_mem_err_nb_init) {
+		ret = opal_message_notifier_register(
+					OPAL_MSG_MEM_ERR, &opal_mem_err_nb);
+		if (ret) {
+			pr_err("%s: Can't register OPAL event notifier (%d)\n",
+			       __func__, ret);
+			return ret;
+		}
+		opal_mem_err_nb_init = 1;
+	}
+	return 0;
+}
+subsys_initcall(opal_mem_err_init);
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
new file mode 100644
index 00000000000..44ed78af1a0
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -0,0 +1,124 @@
+/*
+ * PowerNV OPAL in-memory console interface
+ *
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/io.h>
+#include <asm/opal.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/types.h>
+#include <asm/barrier.h>
+
+/* OPAL in-memory console. Defined in OPAL source at core/console.c */
+struct memcons {
+	__be64 magic;
+#define MEMCONS_MAGIC	0x6630696567726173L
+	__be64 obuf_phys;
+	__be64 ibuf_phys;
+	__be32 obuf_size;
+	__be32 ibuf_size;
+	__be32 out_pos;
+#define MEMCONS_OUT_POS_WRAP	0x80000000u
+#define MEMCONS_OUT_POS_MASK	0x00ffffffu
+	__be32 in_prod;
+	__be32 in_cons;
+};
+
+static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
+				struct bin_attribute *bin_attr, char *to,
+				loff_t pos, size_t count)
+{
+	struct memcons *mc = bin_attr->private;
+	const char *conbuf;
+	ssize_t ret;
+	size_t first_read = 0;
+	uint32_t out_pos, avail;
+
+	if (!mc)
+		return -ENODEV;
+
+	out_pos = be32_to_cpu(ACCESS_ONCE(mc->out_pos));
+
+	/* Now we've read out_pos, put a barrier in before reading the new
+	 * data it points to in conbuf. */
+	smp_rmb();
+
+	conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys));
+
+	/* When the buffer has wrapped, read from the out_pos marker to the end
+	 * of the buffer, and then read the remaining data as in the un-wrapped
+	 * case. */
+	if (out_pos & MEMCONS_OUT_POS_WRAP) {
+
+		out_pos &= MEMCONS_OUT_POS_MASK;
+		avail = be32_to_cpu(mc->obuf_size) - out_pos;
+
+		ret = memory_read_from_buffer(to, count, &pos,
+				conbuf + out_pos, avail);
+
+		if (ret < 0)
+			goto out;
+
+		first_read = ret;
+		to += first_read;
+		count -= first_read;
+		pos -= avail;
+
+		if (count <= 0)
+			goto out;
+	}
+
+	/* Sanity check. The firmware should not do this to us. */
+	if (out_pos > be32_to_cpu(mc->obuf_size)) {
+		pr_err("OPAL: memory console corruption. Aborting read.\n");
+		return -EINVAL;
+	}
+
+	ret = memory_read_from_buffer(to, count, &pos, conbuf, out_pos);
+
+	if (ret < 0)
+		goto out;
+
+	ret += first_read;
+out:
+	return ret;
+}
+
+static struct bin_attribute opal_msglog_attr = {
+	.attr = {.name = "msglog", .mode = 0444},
+	.read = opal_msglog_read
+};
+
+void __init opal_msglog_init(void)
+{
+	u64 mcaddr;
+	struct memcons *mc;
+
+	if (of_property_read_u64(opal_node, "ibm,opal-memcons", &mcaddr)) {
+		pr_warn("OPAL: Property ibm,opal-memcons not found, no message log\n");
+		return;
+	}
+
+	mc = phys_to_virt(mcaddr);
+	if (!mc) {
+		pr_warn("OPAL: memory console address is invalid\n");
+		return;
+	}
+
+	if (be64_to_cpu(mc->magic) != MEMCONS_MAGIC) {
+		pr_warn("OPAL: memory console version is invalid\n");
+		return;
+	}
+
+	opal_msglog_attr.private = mc;
+
+	if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0)
+		pr_warn("OPAL: sysfs file creation failed\n");
+}
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
index 3f83e1ae26a..acd9f7e9667 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -65,7 +65,7 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
 void __init opal_nvram_init(void)
 {
 	struct device_node *np;
-	const u32 *nbytes_p;
+	const __be32 *nbytes_p;
 
 	np = of_find_compatible_node(NULL, NULL, "ibm,opal-nvram");
 	if (np == NULL)
@@ -76,7 +76,7 @@ void __init opal_nvram_init(void)
 		of_node_put(np);
 		return;
 	}
-	nvram_size = *nbytes_p;
+	nvram_size = be32_to_cpup(nbytes_p);
 
 	printk(KERN_INFO "OPAL nvram setup, %u bytes\n", nvram_size);
 	of_node_put(np);
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index 2aa7641aac9..b1885db8fdf 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -18,6 +18,7 @@
 
 #include <asm/opal.h>
 #include <asm/firmware.h>
+#include <asm/machdep.h>
 
 static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm)
 {
@@ -37,17 +38,24 @@ unsigned long __init opal_get_boot_time(void)
 	struct rtc_time tm;
 	u32 y_m_d;
 	u64 h_m_s_ms;
+	__be32 __y_m_d;
+	__be64 __h_m_s_ms;
 	long rc = OPAL_BUSY;
 
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
-		rc = opal_rtc_read(&y_m_d, &h_m_s_ms);
+		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
 		if (rc == OPAL_BUSY_EVENT)
 			opal_poll_events(NULL);
 		else
 			mdelay(10);
 	}
-	if (rc != OPAL_SUCCESS)
+	if (rc != OPAL_SUCCESS) {
+		ppc_md.get_rtc_time = NULL;
+		ppc_md.set_rtc_time = NULL;
 		return 0;
+	}
+	y_m_d = be32_to_cpu(__y_m_d);
+	h_m_s_ms = be64_to_cpu(__h_m_s_ms);
 	opal_to_tm(y_m_d, h_m_s_ms, &tm);
 	return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
 		      tm.tm_hour, tm.tm_min, tm.tm_sec);
@@ -58,9 +66,11 @@ void opal_get_rtc_time(struct rtc_time *tm)
 	long rc = OPAL_BUSY;
 	u32 y_m_d;
 	u64 h_m_s_ms;
+	__be32 __y_m_d;
+	__be64 __h_m_s_ms;
 
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
-		rc = opal_rtc_read(&y_m_d, &h_m_s_ms);
+		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
 		if (rc == OPAL_BUSY_EVENT)
 			opal_poll_events(NULL);
 		else
@@ -68,6 +78,8 @@ void opal_get_rtc_time(struct rtc_time *tm)
 	}
 	if (rc != OPAL_SUCCESS)
 		return;
+	y_m_d = be32_to_cpu(__y_m_d);
+	h_m_s_ms = be64_to_cpu(__h_m_s_ms);
 	opal_to_tm(y_m_d, h_m_s_ms, tm);
 }
 
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
new file mode 100644
index 00000000000..10271ad1fac
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -0,0 +1,66 @@
+/*
+ * PowerNV sensor code
+ *
+ * Copyright (C) 2013 IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <asm/opal.h>
+
+static DEFINE_MUTEX(opal_sensor_mutex);
+
+/*
+ * This will return sensor information to driver based on the requested sensor
+ * handle. A handle is an opaque id for the powernv, read by the driver from the
+ * device tree..
+ */
+int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
+{
+	int ret, token;
+	struct opal_msg msg;
+	__be32 data;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		pr_err("%s: Couldn't get the token, returning\n", __func__);
+		ret = token;
+		goto out;
+	}
+
+	mutex_lock(&opal_sensor_mutex);
+	ret = opal_sensor_read(sensor_hndl, token, &data);
+	if (ret != OPAL_ASYNC_COMPLETION)
+		goto out_token;
+
+	ret = opal_async_wait_response(token, &msg);
+	if (ret) {
+		pr_err("%s: Failed to wait for the async response, %d\n",
+				__func__, ret);
+		goto out_token;
+	}
+
+	*sensor_data = be32_to_cpu(data);
+	ret = be64_to_cpu(msg.params[1]);
+
+out_token:
+	mutex_unlock(&opal_sensor_mutex);
+	opal_async_release_token(token);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(opal_get_sensor_data);
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
new file mode 100644
index 00000000000..9d1acf22a09
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -0,0 +1,304 @@
+/*
+ * PowerNV system parameter code
+ *
+ * Copyright (C) 2013 IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kobject.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/gfp.h>
+#include <linux/stat.h>
+#include <asm/opal.h>
+
+#define MAX_PARAM_DATA_LEN	64
+
+static DEFINE_MUTEX(opal_sysparam_mutex);
+static struct kobject *sysparam_kobj;
+static void *param_data_buf;
+
+struct param_attr {
+	struct list_head list;
+	u32 param_id;
+	u32 param_size;
+	struct kobj_attribute kobj_attr;
+};
+
+static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
+{
+	struct opal_msg msg;
+	ssize_t ret;
+	int token;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		if (token != -ERESTARTSYS)
+			pr_err("%s: Couldn't get the token, returning\n",
+					__func__);
+		ret = token;
+		goto out;
+	}
+
+	ret = opal_get_param(token, param_id, (u64)buffer, length);
+	if (ret != OPAL_ASYNC_COMPLETION)
+		goto out_token;
+
+	ret = opal_async_wait_response(token, &msg);
+	if (ret) {
+		pr_err("%s: Failed to wait for the async response, %zd\n",
+				__func__, ret);
+		goto out_token;
+	}
+
+	ret = be64_to_cpu(msg.params[1]);
+
+out_token:
+	opal_async_release_token(token);
+out:
+	return ret;
+}
+
+static int opal_set_sys_param(u32 param_id, u32 length, void *buffer)
+{
+	struct opal_msg msg;
+	int ret, token;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		if (token != -ERESTARTSYS)
+			pr_err("%s: Couldn't get the token, returning\n",
+					__func__);
+		ret = token;
+		goto out;
+	}
+
+	ret = opal_set_param(token, param_id, (u64)buffer, length);
+
+	if (ret != OPAL_ASYNC_COMPLETION)
+		goto out_token;
+
+	ret = opal_async_wait_response(token, &msg);
+	if (ret) {
+		pr_err("%s: Failed to wait for the async response, %d\n",
+				__func__, ret);
+		goto out_token;
+	}
+
+	ret = be64_to_cpu(msg.params[1]);
+
+out_token:
+	opal_async_release_token(token);
+out:
+	return ret;
+}
+
+static ssize_t sys_param_show(struct kobject *kobj,
+		struct kobj_attribute *kobj_attr, char *buf)
+{
+	struct param_attr *attr = container_of(kobj_attr, struct param_attr,
+			kobj_attr);
+	ssize_t ret;
+
+	mutex_lock(&opal_sysparam_mutex);
+	ret = opal_get_sys_param(attr->param_id, attr->param_size,
+			param_data_buf);
+	if (ret)
+		goto out;
+
+	memcpy(buf, param_data_buf, attr->param_size);
+
+	ret = attr->param_size;
+out:
+	mutex_unlock(&opal_sysparam_mutex);
+	return ret;
+}
+
+static ssize_t sys_param_store(struct kobject *kobj,
+		struct kobj_attribute *kobj_attr, const char *buf, size_t count)
+{
+	struct param_attr *attr = container_of(kobj_attr, struct param_attr,
+			kobj_attr);
+	ssize_t ret;
+
+        /* MAX_PARAM_DATA_LEN is sizeof(param_data_buf) */
+        if (count > MAX_PARAM_DATA_LEN)
+                count = MAX_PARAM_DATA_LEN;
+
+	mutex_lock(&opal_sysparam_mutex);
+	memcpy(param_data_buf, buf, count);
+	ret = opal_set_sys_param(attr->param_id, attr->param_size,
+			param_data_buf);
+	mutex_unlock(&opal_sysparam_mutex);
+	if (!ret)
+		ret = count;
+	return ret;
+}
+
+void __init opal_sys_param_init(void)
+{
+	struct device_node *sysparam;
+	struct param_attr *attr;
+	u32 *id, *size;
+	int count, i;
+	u8 *perm;
+
+	if (!opal_kobj) {
+		pr_warn("SYSPARAM: opal kobject is not available\n");
+		goto out;
+	}
+
+	sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj);
+	if (!sysparam_kobj) {
+		pr_err("SYSPARAM: Failed to create sysparam kobject\n");
+		goto out;
+	}
+
+	/* Allocate big enough buffer for any get/set transactions */
+	param_data_buf = kzalloc(MAX_PARAM_DATA_LEN, GFP_KERNEL);
+	if (!param_data_buf) {
+		pr_err("SYSPARAM: Failed to allocate memory for param data "
+				"buf\n");
+		goto out_kobj_put;
+	}
+
+	sysparam = of_find_node_by_path("/ibm,opal/sysparams");
+	if (!sysparam) {
+		pr_err("SYSPARAM: Opal sysparam node not found\n");
+		goto out_param_buf;
+	}
+
+	if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) {
+		pr_err("SYSPARAM: Opal sysparam node not compatible\n");
+		goto out_node_put;
+	}
+
+	/* Number of parameters exposed through DT */
+	count = of_property_count_strings(sysparam, "param-name");
+	if (count < 0) {
+		pr_err("SYSPARAM: No string found of property param-name in "
+				"the node %s\n", sysparam->name);
+		goto out_node_put;
+	}
+
+	id = kzalloc(sizeof(*id) * count, GFP_KERNEL);
+	if (!id) {
+		pr_err("SYSPARAM: Failed to allocate memory to read parameter "
+				"id\n");
+		goto out_node_put;
+	}
+
+	size = kzalloc(sizeof(*size) * count, GFP_KERNEL);
+	if (!size) {
+		pr_err("SYSPARAM: Failed to allocate memory to read parameter "
+				"size\n");
+		goto out_free_id;
+	}
+
+	perm = kzalloc(sizeof(*perm) * count, GFP_KERNEL);
+	if (!perm) {
+		pr_err("SYSPARAM: Failed to allocate memory to read supported "
+				"action on the parameter");
+		goto out_free_size;
+	}
+
+	if (of_property_read_u32_array(sysparam, "param-id", id, count)) {
+		pr_err("SYSPARAM: Missing property param-id in the DT\n");
+		goto out_free_perm;
+	}
+
+	if (of_property_read_u32_array(sysparam, "param-len", size, count)) {
+		pr_err("SYSPARAM: Missing property param-len in the DT\n");
+		goto out_free_perm;
+	}
+
+
+	if (of_property_read_u8_array(sysparam, "param-perm", perm, count)) {
+		pr_err("SYSPARAM: Missing property param-perm in the DT\n");
+		goto out_free_perm;
+	}
+
+	attr = kzalloc(sizeof(*attr) * count, GFP_KERNEL);
+	if (!attr) {
+		pr_err("SYSPARAM: Failed to allocate memory for parameter "
+				"attributes\n");
+		goto out_free_perm;
+	}
+
+	/* For each of the parameters, populate the parameter attributes */
+	for (i = 0; i < count; i++) {
+		if (size[i] > MAX_PARAM_DATA_LEN) {
+			pr_warn("SYSPARAM: Not creating parameter %d as size "
+				"exceeds buffer length\n", i);
+			continue;
+		}
+
+		sysfs_attr_init(&attr[i].kobj_attr.attr);
+		attr[i].param_id = id[i];
+		attr[i].param_size = size[i];
+		if (of_property_read_string_index(sysparam, "param-name", i,
+				&attr[i].kobj_attr.attr.name))
+			continue;
+
+		/* If the parameter is read-only or read-write */
+		switch (perm[i] & 3) {
+		case OPAL_SYSPARAM_READ:
+			attr[i].kobj_attr.attr.mode = S_IRUGO;
+			break;
+		case OPAL_SYSPARAM_WRITE:
+			attr[i].kobj_attr.attr.mode = S_IWUSR;
+			break;
+		case OPAL_SYSPARAM_RW:
+			attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR;
+			break;
+		default:
+			break;
+		}
+
+		attr[i].kobj_attr.show = sys_param_show;
+		attr[i].kobj_attr.store = sys_param_store;
+
+		if (sysfs_create_file(sysparam_kobj, &attr[i].kobj_attr.attr)) {
+			pr_err("SYSPARAM: Failed to create sysfs file %s\n",
+					attr[i].kobj_attr.attr.name);
+			goto out_free_attr;
+		}
+	}
+
+	kfree(perm);
+	kfree(size);
+	kfree(id);
+	of_node_put(sysparam);
+	return;
+
+out_free_attr:
+	kfree(attr);
+out_free_perm:
+	kfree(perm);
+out_free_size:
+	kfree(size);
+out_free_id:
+	kfree(id);
+out_node_put:
+	of_node_put(sysparam);
+out_param_buf:
+	kfree(param_data_buf);
+out_kobj_put:
+	kobject_put(sysparam_kobj);
+out:
+	return;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-takeover.S b/arch/powerpc/platforms/powernv/opal-takeover.S
deleted file mode 100644
index 3cd262897c2..00000000000
--- a/arch/powerpc/platforms/powernv/opal-takeover.S
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * PowerNV OPAL takeover assembly code, for use by prom_init.c
- *
- * Copyright 2011 IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/hvcall.h>
-#include <asm/asm-offsets.h>
-#include <asm/opal.h>
-
-#define H_HAL_TAKEOVER			0x5124
-#define H_HAL_TAKEOVER_QUERY_MAGIC	-1
-
-	.text
-_GLOBAL(opal_query_takeover)
-	mfcr	r0
-	stw	r0,8(r1)
-	std	r3,STK_PARAM(R3)(r1)
-	std	r4,STK_PARAM(R4)(r1)
-	li	r3,H_HAL_TAKEOVER
-	li	r4,H_HAL_TAKEOVER_QUERY_MAGIC
-	HVSC
-	ld	r10,STK_PARAM(R3)(r1)
-	std	r4,0(r10)
-	ld	r10,STK_PARAM(R4)(r1)
-	std	r5,0(r10)
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-	blr
-
-_GLOBAL(opal_do_takeover)
-	mfcr	r0
-	stw	r0,8(r1)
-	mflr	r0
-	std	r0,16(r1)
-	bl	__opal_do_takeover
-	ld	r0,16(r1)
-	mtlr	r0
-	lwz	r0,8(r1)
-	mtcrf	0xff,r0
-	blr
-
-__opal_do_takeover:
-	ld	r4,0(r3)
-	ld	r5,0x8(r3)
-	ld	r6,0x10(r3)
-	ld	r7,0x18(r3)
-	ld	r8,0x20(r3)
-	ld	r9,0x28(r3)
-	ld	r10,0x30(r3)
-	ld	r11,0x38(r3)
-	li	r3,H_HAL_TAKEOVER
-	HVSC
-	blr
-
-	.globl opal_secondary_entry
-opal_secondary_entry:
-	mr	r31,r3
-	mfmsr	r11
-	li	r12,(MSR_SF | MSR_ISF)@highest
-	sldi	r12,r12,48
-	or	r11,r11,r12
-	mtmsrd	r11
-	isync
-	mfspr	r4,SPRN_PIR
-	std	r4,0(r3)
-1:	HMT_LOW
-	ld	r4,8(r3)
-	cmpli	cr0,r4,0
-	beq	1b
-	HMT_MEDIUM
-1:	addi	r3,r31,16
-	bl	__opal_do_takeover
-	b	1b
-
-_GLOBAL(opal_enter_rtas)
-	mflr	r0
-	std	r0,16(r1)
-        stdu	r1,-PROM_FRAME_SIZE(r1)	/* Save SP and create stack space */
-
-	/* Because PROM is running in 32b mode, it clobbers the high order half
-	 * of all registers that it saves.  We therefore save those registers
-	 * PROM might touch to the stack.  (r0, r3-r13 are caller saved)
-	*/
-	SAVE_GPR(2, r1)
-	SAVE_GPR(13, r1)
-	SAVE_8GPRS(14, r1)
-	SAVE_10GPRS(22, r1)
-	mfcr	r10
-	mfmsr	r11
-	std	r10,_CCR(r1)
-	std	r11,_MSR(r1)
-
-	/* Get the PROM entrypoint */
-	mtlr	r5
-
-	/* Switch MSR to 32 bits mode
-	 */
-        li      r12,1
-        rldicr  r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
-        andc    r11,r11,r12
-        li      r12,1
-        rldicr  r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
-        andc    r11,r11,r12
-        mtmsrd  r11
-        isync
-
-	/* Enter RTAS here... */
-	blrl
-
-	/* Just make sure that r1 top 32 bits didn't get
-	 * corrupt by OF
-	 */
-	rldicl	r1,r1,0,32
-
-	/* Restore the MSR (back to 64 bits) */
-	ld	r0,_MSR(r1)
-	MTMSRD(r0)
-        isync
-
-	/* Restore other registers */
-	REST_GPR(2, r1)
-	REST_GPR(13, r1)
-	REST_8GPRS(14, r1)
-	REST_10GPRS(22, r1)
-	ld	r4,_CCR(r1)
-	mtcr	r4
-
-        addi	r1,r1,PROM_FRAME_SIZE
-	ld	r0,16(r1)
-	mtlr    r0
-	blr
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6fabe92eafb..4abbff22a61 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -24,7 +24,7 @@
 	mflr	r0;			\
 	mfcr	r12;			\
 	std	r0,16(r1);		\
-	std	r12,8(r1);		\
+	stw	r12,8(r1);		\
 	std	r1,PACAR1(r13);		\
 	li	r0,0;			\
 	mfmsr	r12;			\
@@ -32,9 +32,9 @@
 	std	r12,PACASAVEDMSR(r13);	\
 	andc	r12,r12,r0;		\
 	mtmsrd	r12,1;			\
-	LOAD_REG_ADDR(r0,.opal_return);	\
+	LOAD_REG_ADDR(r0,opal_return);	\
 	mtlr	r0;			\
-	li	r0,MSR_DR|MSR_IR;	\
+	li	r0,MSR_DR|MSR_IR|MSR_LE;\
 	andc	r12,r12,r0;		\
 	li	r0,token;		\
 	mtspr	SPRN_HSRR1,r12;		\
@@ -44,9 +44,16 @@
 	mtspr	SPRN_HSRR0,r12;		\
 	hrfid
 
-_STATIC(opal_return)
+opal_return:
+	/*
+	 * Fixup endian on OPAL return... we should be able to simplify
+	 * this by instead converting the below trampoline to a set of
+	 * bytes (always BE) since MSR:LE will end up fixed up as a side
+	 * effect of the rfid.
+	 */
+	FIXUP_ENDIAN
 	ld	r2,PACATOC(r13);
-	ld	r4,8(r1);
+	lwz	r4,8(r1);
 	ld	r5,16(r1);
 	ld	r6,PACASAVEDMSR(r13);
 	mtspr	SPRN_SRR0,r5;
@@ -54,6 +61,7 @@ _STATIC(opal_return)
 	mtcr	r4;
 	rfid
 
+OPAL_CALL(opal_invalid_call,			OPAL_INVALID_CALL);
 OPAL_CALL(opal_console_write,			OPAL_CONSOLE_WRITE);
 OPAL_CALL(opal_console_read,			OPAL_CONSOLE_READ);
 OPAL_CALL(opal_console_write_buffer_space,	OPAL_CONSOLE_WRITE_BUFFER_SPACE);
@@ -107,4 +115,34 @@ OPAL_CALL(opal_pci_mask_pe_error,		OPAL_PCI_MASK_PE_ERROR);
 OPAL_CALL(opal_set_slot_led_status,		OPAL_SET_SLOT_LED_STATUS);
 OPAL_CALL(opal_get_epow_status,			OPAL_GET_EPOW_STATUS);
 OPAL_CALL(opal_set_system_attention_led,	OPAL_SET_SYSTEM_ATTENTION_LED);
+OPAL_CALL(opal_pci_next_error,			OPAL_PCI_NEXT_ERROR);
+OPAL_CALL(opal_pci_poll,			OPAL_PCI_POLL);
 OPAL_CALL(opal_pci_msi_eoi,			OPAL_PCI_MSI_EOI);
+OPAL_CALL(opal_pci_get_phb_diag_data2,		OPAL_PCI_GET_PHB_DIAG_DATA2);
+OPAL_CALL(opal_xscom_read,			OPAL_XSCOM_READ);
+OPAL_CALL(opal_xscom_write,			OPAL_XSCOM_WRITE);
+OPAL_CALL(opal_lpc_read,			OPAL_LPC_READ);
+OPAL_CALL(opal_lpc_write,			OPAL_LPC_WRITE);
+OPAL_CALL(opal_return_cpu,			OPAL_RETURN_CPU);
+OPAL_CALL(opal_reinit_cpus,			OPAL_REINIT_CPUS);
+OPAL_CALL(opal_read_elog,			OPAL_ELOG_READ);
+OPAL_CALL(opal_send_ack_elog,			OPAL_ELOG_ACK);
+OPAL_CALL(opal_get_elog_size,			OPAL_ELOG_SIZE);
+OPAL_CALL(opal_resend_pending_logs,		OPAL_ELOG_RESEND);
+OPAL_CALL(opal_write_elog,			OPAL_ELOG_WRITE);
+OPAL_CALL(opal_validate_flash,			OPAL_FLASH_VALIDATE);
+OPAL_CALL(opal_manage_flash,			OPAL_FLASH_MANAGE);
+OPAL_CALL(opal_update_flash,			OPAL_FLASH_UPDATE);
+OPAL_CALL(opal_resync_timebase,			OPAL_RESYNC_TIMEBASE);
+OPAL_CALL(opal_dump_init,			OPAL_DUMP_INIT);
+OPAL_CALL(opal_dump_info,			OPAL_DUMP_INFO);
+OPAL_CALL(opal_dump_info2,			OPAL_DUMP_INFO2);
+OPAL_CALL(opal_dump_read,			OPAL_DUMP_READ);
+OPAL_CALL(opal_dump_ack,			OPAL_DUMP_ACK);
+OPAL_CALL(opal_get_msg,				OPAL_GET_MSG);
+OPAL_CALL(opal_check_completion,		OPAL_CHECK_ASYNC_COMPLETION);
+OPAL_CALL(opal_dump_resend_notification,	OPAL_DUMP_RESEND);
+OPAL_CALL(opal_sync_host_reboot,		OPAL_SYNC_HOST_REBOOT);
+OPAL_CALL(opal_sensor_read,			OPAL_SENSOR_READ);
+OPAL_CALL(opal_get_param,			OPAL_GET_PARAM);
+OPAL_CALL(opal_set_param,			OPAL_SET_PARAM);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
new file mode 100644
index 00000000000..4cd2ea6c0db
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -0,0 +1,133 @@
+/*
+ * PowerNV LPC bus handling.
+ *
+ * Copyright 2013 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/bug.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/opal.h>
+#include <asm/scom.h>
+
+/*
+ * We could probably fit that inside the scom_map_t
+ * which is a void* after all but it's really too ugly
+ * so let's kmalloc it for now
+ */
+struct opal_scom_map {
+	uint32_t chip;
+	uint64_t addr;
+};
+
+static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
+{
+	struct opal_scom_map *m;
+	const __be32 *gcid;
+
+	if (!of_get_property(dev, "scom-controller", NULL)) {
+		pr_err("%s: device %s is not a SCOM controller\n",
+			__func__, dev->full_name);
+		return SCOM_MAP_INVALID;
+	}
+	gcid = of_get_property(dev, "ibm,chip-id", NULL);
+	if (!gcid) {
+		pr_err("%s: device %s has no ibm,chip-id\n",
+			__func__, dev->full_name);
+		return SCOM_MAP_INVALID;
+	}
+	m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL);
+	if (!m)
+		return NULL;
+	m->chip = be32_to_cpup(gcid);
+	m->addr = reg;
+
+	return (scom_map_t)m;
+}
+
+static void opal_scom_unmap(scom_map_t map)
+{
+	kfree(map);
+}
+
+static int opal_xscom_err_xlate(int64_t rc)
+{
+	switch(rc) {
+	case 0:
+		return 0;
+	/* Add more translations if necessary */
+	default:
+		return -EIO;
+	}
+}
+
+static u64 opal_scom_unmangle(u64 addr)
+{
+	/*
+	 * XSCOM indirect addresses have the top bit set. Additionally
+	 * the rest of the top 3 nibbles is always 0.
+	 *
+	 * Because the debugfs interface uses signed offsets and shifts
+	 * the address left by 3, we basically cannot use the top 4 bits
+	 * of the 64-bit address, and thus cannot use the indirect bit.
+	 *
+	 * To deal with that, we support the indirect bit being in bit
+	 * 4 (IBM notation) instead of bit 0 in this API, we do the
+	 * conversion here. To leave room for further xscom address
+	 * expansion, we only clear out the top byte
+	 *
+	 * For in-kernel use, we also support the real indirect bit, so
+	 * we test for any of the top 5 bits
+	 *
+	 */
+	if (addr & (0x1full << 59))
+		addr = (addr & ~(0xffull << 56)) | (1ull << 63);
+	return addr;
+}
+
+static int opal_scom_read(scom_map_t map, u64 reg, u64 *value)
+{
+	struct opal_scom_map *m = map;
+	int64_t rc;
+	__be64 v;
+
+	reg = opal_scom_unmangle(m->addr + reg);
+	rc = opal_xscom_read(m->chip, reg, (__be64 *)__pa(&v));
+	*value = be64_to_cpu(v);
+	return opal_xscom_err_xlate(rc);
+}
+
+static int opal_scom_write(scom_map_t map, u64 reg, u64 value)
+{
+	struct opal_scom_map *m = map;
+	int64_t rc;
+
+	reg = opal_scom_unmangle(m->addr + reg);
+	rc = opal_xscom_write(m->chip, reg, value);
+	return opal_xscom_err_xlate(rc);
+}
+
+static const struct scom_controller opal_scom_controller = {
+	.map	= opal_scom_map,
+	.unmap	= opal_scom_unmap,
+	.read	= opal_scom_read,
+	.write	= opal_scom_write
+};
+
+static int opal_xscom_init(void)
+{
+	if (firmware_has_feature(FW_FEATURE_OPALv3))
+		scom_init(&opal_scom_controller);
+	return 0;
+}
+arch_initcall(opal_xscom_init);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 628c564cead..199975613fe 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -13,47 +13,91 @@
 
 #include <linux/types.h>
 #include <linux/of.h>
+#include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 #include <linux/interrupt.h>
+#include <linux/notifier.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/kobject.h>
+#include <linux/delay.h>
+#include <linux/memblock.h>
 #include <asm/opal.h>
 #include <asm/firmware.h>
+#include <asm/mce.h>
 
 #include "powernv.h"
 
+/* /sys/firmware/opal */
+struct kobject *opal_kobj;
+
 struct opal {
 	u64 base;
 	u64 entry;
+	u64 size;
 } opal;
 
-static struct device_node *opal_node;
+struct mcheck_recoverable_range {
+	u64 start_addr;
+	u64 end_addr;
+	u64 recover_addr;
+};
+
+static struct mcheck_recoverable_range *mc_recoverable_range;
+static int mc_recoverable_range_len;
+
+struct device_node *opal_node;
 static DEFINE_SPINLOCK(opal_write_lock);
 extern u64 opal_mc_secondary_handler[];
 static unsigned int *opal_irqs;
 static unsigned int opal_irq_count;
+static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
+static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
+static DEFINE_SPINLOCK(opal_notifier_lock);
+static uint64_t last_notified_mask = 0x0ul;
+static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
+
+static void opal_reinit_cores(void)
+{
+	/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
+	 *
+	 * It will preserve non volatile GPRs and HSPRG0/1. It will
+	 * also restore HIDs and other SPRs to their original value
+	 * but it might clobber a bunch.
+	 */
+#ifdef __BIG_ENDIAN__
+	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
+#else
+	opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
+#endif
+}
 
 int __init early_init_dt_scan_opal(unsigned long node,
 				   const char *uname, int depth, void *data)
 {
-	const void *basep, *entryp;
-	unsigned long basesz, entrysz;
+	const void *basep, *entryp, *sizep;
+	int basesz, entrysz, runtimesz;
 
 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
 		return 0;
 
 	basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
 	entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
+	sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
 
-	if (!basep || !entryp)
+	if (!basep || !entryp || !sizep)
 		return 1;
 
 	opal.base = of_read_number(basep, basesz/4);
 	opal.entry = of_read_number(entryp, entrysz/4);
+	opal.size = of_read_number(sizep, runtimesz/4);
 
-	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%ld)\n",
+	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
 		 opal.base, basep, basesz);
-	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%ld)\n",
+	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
 		 opal.entry, entryp, entrysz);
+	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
+		 opal.size, sizep, runtimesz);
 
 	powerpc_firmware_features |= FW_FEATURE_OPAL;
 	if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
@@ -67,57 +111,289 @@ int __init early_init_dt_scan_opal(unsigned long node,
 		printk("OPAL V1 detected !\n");
 	}
 
+	/* Reinit all cores with the right endian */
+	opal_reinit_cores();
+
+	/* Restore some bits */
+	if (cur_cpu_spec->cpu_restore)
+		cur_cpu_spec->cpu_restore();
+
+	return 1;
+}
+
+int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
+				   const char *uname, int depth, void *data)
+{
+	int i, psize, size;
+	const __be32 *prop;
+
+	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
+
+	if (!prop)
+		return 1;
+
+	pr_debug("Found machine check recoverable ranges.\n");
+
+	/*
+	 * Calculate number of available entries.
+	 *
+	 * Each recoverable address range entry is (start address, len,
+	 * recovery address), 2 cells each for start and recovery address,
+	 * 1 cell for len, totalling 5 cells per entry.
+	 */
+	mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
+
+	/* Sanity check */
+	if (!mc_recoverable_range_len)
+		return 1;
+
+	/* Size required to hold all the entries. */
+	size = mc_recoverable_range_len *
+			sizeof(struct mcheck_recoverable_range);
+
+	/*
+	 * Allocate a buffer to hold the MC recoverable ranges. We would be
+	 * accessing them in real mode, hence it needs to be within
+	 * RMO region.
+	 */
+	mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
+							ppc64_rma_size));
+	memset(mc_recoverable_range, 0, size);
+
+	for (i = 0; i < mc_recoverable_range_len; i++) {
+		mc_recoverable_range[i].start_addr =
+					of_read_number(prop + (i * 5) + 0, 2);
+		mc_recoverable_range[i].end_addr =
+					mc_recoverable_range[i].start_addr +
+					of_read_number(prop + (i * 5) + 2, 1);
+		mc_recoverable_range[i].recover_addr =
+					of_read_number(prop + (i * 5) + 3, 2);
+
+		pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
+				mc_recoverable_range[i].start_addr,
+				mc_recoverable_range[i].end_addr,
+				mc_recoverable_range[i].recover_addr);
+	}
 	return 1;
 }
 
 static int __init opal_register_exception_handlers(void)
 {
+#ifdef __BIG_ENDIAN__
 	u64 glue;
 
 	if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
 		return -ENODEV;
 
-	/* Hookup some exception handlers. We use the fwnmi area at 0x7000
-	 * to provide the glue space to OPAL
+	/* Hookup some exception handlers except machine check. We use the
+	 * fwnmi area at 0x7000 to provide the glue space to OPAL
 	 */
 	glue = 0x7000;
-	opal_register_exception_handler(OPAL_MACHINE_CHECK_HANDLER,
-					__pa(opal_mc_secondary_handler[0]),
-					glue);
-	glue += 128;
 	opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
 					0, glue);
 	glue += 128;
 	opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
+#endif
 
 	return 0;
 }
 
 early_initcall(opal_register_exception_handlers);
 
+int opal_notifier_register(struct notifier_block *nb)
+{
+	if (!nb) {
+		pr_warning("%s: Invalid argument (%p)\n",
+			   __func__, nb);
+		return -EINVAL;
+	}
+
+	atomic_notifier_chain_register(&opal_notifier_head, nb);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(opal_notifier_register);
+
+int opal_notifier_unregister(struct notifier_block *nb)
+{
+	if (!nb) {
+		pr_warning("%s: Invalid argument (%p)\n",
+			   __func__, nb);
+		return -EINVAL;
+	}
+
+	atomic_notifier_chain_unregister(&opal_notifier_head, nb);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(opal_notifier_unregister);
+
+static void opal_do_notifier(uint64_t events)
+{
+	unsigned long flags;
+	uint64_t changed_mask;
+
+	if (atomic_read(&opal_notifier_hold))
+		return;
+
+	spin_lock_irqsave(&opal_notifier_lock, flags);
+	changed_mask = last_notified_mask ^ events;
+	last_notified_mask = events;
+	spin_unlock_irqrestore(&opal_notifier_lock, flags);
+
+	/*
+	 * We feed with the event bits and changed bits for
+	 * enough information to the callback.
+	 */
+	atomic_notifier_call_chain(&opal_notifier_head,
+				   events, (void *)changed_mask);
+}
+
+void opal_notifier_update_evt(uint64_t evt_mask,
+			      uint64_t evt_val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&opal_notifier_lock, flags);
+	last_notified_mask &= ~evt_mask;
+	last_notified_mask |= evt_val;
+	spin_unlock_irqrestore(&opal_notifier_lock, flags);
+}
+
+void opal_notifier_enable(void)
+{
+	int64_t rc;
+	__be64 evt = 0;
+
+	atomic_set(&opal_notifier_hold, 0);
+
+	/* Process pending events */
+	rc = opal_poll_events(&evt);
+	if (rc == OPAL_SUCCESS && evt)
+		opal_do_notifier(be64_to_cpu(evt));
+}
+
+void opal_notifier_disable(void)
+{
+	atomic_set(&opal_notifier_hold, 1);
+}
+
+/*
+ * Opal message notifier based on message type. Allow subscribers to get
+ * notified for specific messgae type.
+ */
+int opal_message_notifier_register(enum OpalMessageType msg_type,
+					struct notifier_block *nb)
+{
+	if (!nb) {
+		pr_warning("%s: Invalid argument (%p)\n",
+			   __func__, nb);
+		return -EINVAL;
+	}
+	if (msg_type > OPAL_MSG_TYPE_MAX) {
+		pr_warning("%s: Invalid message type argument (%d)\n",
+			   __func__, msg_type);
+		return -EINVAL;
+	}
+	return atomic_notifier_chain_register(
+				&opal_msg_notifier_head[msg_type], nb);
+}
+
+static void opal_message_do_notify(uint32_t msg_type, void *msg)
+{
+	/* notify subscribers */
+	atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
+					msg_type, msg);
+}
+
+static void opal_handle_message(void)
+{
+	s64 ret;
+	/*
+	 * TODO: pre-allocate a message buffer depending on opal-msg-size
+	 * value in /proc/device-tree.
+	 */
+	static struct opal_msg msg;
+	u32 type;
+
+	ret = opal_get_msg(__pa(&msg), sizeof(msg));
+	/* No opal message pending. */
+	if (ret == OPAL_RESOURCE)
+		return;
+
+	/* check for errors. */
+	if (ret) {
+		pr_warning("%s: Failed to retrive opal message, err=%lld\n",
+				__func__, ret);
+		return;
+	}
+
+	type = be32_to_cpu(msg.msg_type);
+
+	/* Sanity check */
+	if (type > OPAL_MSG_TYPE_MAX) {
+		pr_warning("%s: Unknown message type: %u\n", __func__, type);
+		return;
+	}
+	opal_message_do_notify(type, (void *)&msg);
+}
+
+static int opal_message_notify(struct notifier_block *nb,
+			  unsigned long events, void *change)
+{
+	if (events & OPAL_EVENT_MSG_PENDING)
+		opal_handle_message();
+	return 0;
+}
+
+static struct notifier_block opal_message_nb = {
+	.notifier_call	= opal_message_notify,
+	.next		= NULL,
+	.priority	= 0,
+};
+
+static int __init opal_message_init(void)
+{
+	int ret, i;
+
+	for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
+		ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
+
+	ret = opal_notifier_register(&opal_message_nb);
+	if (ret) {
+		pr_err("%s: Can't register OPAL event notifier (%d)\n",
+		       __func__, ret);
+		return ret;
+	}
+	return 0;
+}
+early_initcall(opal_message_init);
+
 int opal_get_chars(uint32_t vtermno, char *buf, int count)
 {
-	s64 len, rc;
-	u64 evt;
+	s64 rc;
+	__be64 evt, len;
 
 	if (!opal.entry)
 		return -ENODEV;
 	opal_poll_events(&evt);
-	if ((evt & OPAL_EVENT_CONSOLE_INPUT) == 0)
+	if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
 		return 0;
-	len = count;
+	len = cpu_to_be64(count);
 	rc = opal_console_read(vtermno, &len, buf);
 	if (rc == OPAL_SUCCESS)
-		return len;
+		return be64_to_cpu(len);
 	return 0;
 }
 
 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 {
 	int written = 0;
+	__be64 olen;
 	s64 len, rc;
 	unsigned long flags;
-	u64 evt;
+	__be64 evt;
 
 	if (!opal.entry)
 		return -ENODEV;
@@ -132,13 +408,14 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 	 */
 	spin_lock_irqsave(&opal_write_lock, flags);
 	if (firmware_has_feature(FW_FEATURE_OPALv2)) {
-		rc = opal_console_write_buffer_space(vtermno, &len);
+		rc = opal_console_write_buffer_space(vtermno, &olen);
+		len = be64_to_cpu(olen);
 		if (rc || len < total_len) {
 			spin_unlock_irqrestore(&opal_write_lock, flags);
 			/* Closed -> drop characters */
 			if (rc)
 				return total_len;
-			opal_poll_events(&evt);
+			opal_poll_events(NULL);
 			return -EAGAIN;
 		}
 	}
@@ -149,8 +426,9 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 	rc = OPAL_BUSY;
 	while(total_len > 0 && (rc == OPAL_BUSY ||
 				rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
-		len = total_len;
-		rc = opal_console_write(vtermno, &len, data);
+		olen = cpu_to_be64(total_len);
+		rc = opal_console_write(vtermno, &olen, data);
+		len = be64_to_cpu(olen);
 
 		/* Closed or other error drop */
 		if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
@@ -170,142 +448,129 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 		 */
 		do
 			opal_poll_events(&evt);
-		while(rc == OPAL_SUCCESS && (evt & OPAL_EVENT_CONSOLE_OUTPUT));
+		while(rc == OPAL_SUCCESS &&
+			(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
 	}
 	spin_unlock_irqrestore(&opal_write_lock, flags);
 	return written;
 }
 
+static int opal_recover_mce(struct pt_regs *regs,
+					struct machine_check_event *evt)
+{
+	int recovered = 0;
+	uint64_t ea = get_mce_fault_addr(evt);
+
+	if (!(regs->msr & MSR_RI)) {
+		/* If MSR_RI isn't set, we cannot recover */
+		recovered = 0;
+	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
+		/* Platform corrected itself */
+		recovered = 1;
+	} else if (ea && !is_kernel_addr(ea)) {
+		/*
+		 * Faulting address is not in kernel text. We should be fine.
+		 * We need to find which process uses this address.
+		 * For now, kill the task if we have received exception when
+		 * in userspace.
+		 *
+		 * TODO: Queue up this address for hwpoisioning later.
+		 */
+		if (user_mode(regs) && !is_global_init(current)) {
+			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+			recovered = 1;
+		} else
+			recovered = 0;
+	} else if (user_mode(regs) && !is_global_init(current) &&
+		evt->severity == MCE_SEV_ERROR_SYNC) {
+		/*
+		 * If we have received a synchronous error when in userspace
+		 * kill the task.
+		 */
+		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+		recovered = 1;
+	}
+	return recovered;
+}
+
 int opal_machine_check(struct pt_regs *regs)
 {
-	struct opal_machine_check_event *opal_evt = get_paca()->opal_mc_evt;
-	struct opal_machine_check_event evt;
-	const char *level, *sevstr, *subtype;
-	static const char *opal_mc_ue_types[] = {
-		"Indeterminate",
-		"Instruction fetch",
-		"Page table walk ifetch",
-		"Load/Store",
-		"Page table walk Load/Store",
-	};
-	static const char *opal_mc_slb_types[] = {
-		"Indeterminate",
-		"Parity",
-		"Multihit",
-	};
-	static const char *opal_mc_erat_types[] = {
-		"Indeterminate",
-		"Parity",
-		"Multihit",
-	};
-	static const char *opal_mc_tlb_types[] = {
-		"Indeterminate",
-		"Parity",
-		"Multihit",
-	};
-
-	/* Copy the event structure and release the original */
-	evt = *opal_evt;
-	opal_evt->in_use = 0;
+	struct machine_check_event evt;
+
+	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+		return 0;
 
 	/* Print things out */
-	if (evt.version != OpalMCE_V1) {
+	if (evt.version != MCE_V1) {
 		pr_err("Machine Check Exception, Unknown event version %d !\n",
 		       evt.version);
 		return 0;
 	}
-	switch(evt.severity) {
-	case OpalMCE_SEV_NO_ERROR:
-		level = KERN_INFO;
-		sevstr = "Harmless";
-		break;
-	case OpalMCE_SEV_WARNING:
-		level = KERN_WARNING;
-		sevstr = "";
-		break;
-	case OpalMCE_SEV_ERROR_SYNC:
-		level = KERN_ERR;
-		sevstr = "Severe";
-		break;
-	case OpalMCE_SEV_FATAL:
-	default:
-		level = KERN_ERR;
-		sevstr = "Fatal";
-		break;
-	}
+	machine_check_print_event_info(&evt);
 
-	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
-	       evt.disposition == OpalMCE_DISPOSITION_RECOVERED ?
-	       "Recovered" : "[Not recovered");
-	printk("%s  Initiator: %s\n", level,
-	       evt.initiator == OpalMCE_INITIATOR_CPU ? "CPU" : "Unknown");
-	switch(evt.error_type) {
-	case OpalMCE_ERROR_TYPE_UE:
-		subtype = evt.u.ue_error.ue_error_type <
-			ARRAY_SIZE(opal_mc_ue_types) ?
-			opal_mc_ue_types[evt.u.ue_error.ue_error_type]
-			: "Unknown";
-		printk("%s  Error type: UE [%s]\n", level, subtype);
-		if (evt.u.ue_error.effective_address_provided)
-			printk("%s    Effective address: %016llx\n",
-			       level, evt.u.ue_error.effective_address);
-		if (evt.u.ue_error.physical_address_provided)
-			printk("%s      Physial address: %016llx\n",
-			       level, evt.u.ue_error.physical_address);
-		break;
-	case OpalMCE_ERROR_TYPE_SLB:
-		subtype = evt.u.slb_error.slb_error_type <
-			ARRAY_SIZE(opal_mc_slb_types) ?
-			opal_mc_slb_types[evt.u.slb_error.slb_error_type]
-			: "Unknown";
-		printk("%s  Error type: SLB [%s]\n", level, subtype);
-		if (evt.u.slb_error.effective_address_provided)
-			printk("%s    Effective address: %016llx\n",
-			       level, evt.u.slb_error.effective_address);
-		break;
-	case OpalMCE_ERROR_TYPE_ERAT:
-		subtype = evt.u.erat_error.erat_error_type <
-			ARRAY_SIZE(opal_mc_erat_types) ?
-			opal_mc_erat_types[evt.u.erat_error.erat_error_type]
-			: "Unknown";
-		printk("%s  Error type: ERAT [%s]\n", level, subtype);
-		if (evt.u.erat_error.effective_address_provided)
-			printk("%s    Effective address: %016llx\n",
-			       level, evt.u.erat_error.effective_address);
-		break;
-	case OpalMCE_ERROR_TYPE_TLB:
-		subtype = evt.u.tlb_error.tlb_error_type <
-			ARRAY_SIZE(opal_mc_tlb_types) ?
-			opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type]
-			: "Unknown";
-		printk("%s  Error type: TLB [%s]\n", level, subtype);
-		if (evt.u.tlb_error.effective_address_provided)
-			printk("%s    Effective address: %016llx\n",
-			       level, evt.u.tlb_error.effective_address);
-		break;
-	default:
-	case OpalMCE_ERROR_TYPE_UNKNOWN:
-		printk("%s  Error type: Unknown\n", level);
-		break;
-	}
-	return evt.severity == OpalMCE_SEV_FATAL ? 0 : 1;
+	if (opal_recover_mce(regs, &evt))
+		return 1;
+	return 0;
+}
+
+static uint64_t find_recovery_address(uint64_t nip)
+{
+	int i;
+
+	for (i = 0; i < mc_recoverable_range_len; i++)
+		if ((nip >= mc_recoverable_range[i].start_addr) &&
+		    (nip < mc_recoverable_range[i].end_addr))
+		    return mc_recoverable_range[i].recover_addr;
+	return 0;
+}
+
+bool opal_mce_check_early_recovery(struct pt_regs *regs)
+{
+	uint64_t recover_addr = 0;
+
+	if (!opal.base || !opal.size)
+		goto out;
+
+	if ((regs->nip >= opal.base) &&
+			(regs->nip <= (opal.base + opal.size)))
+		recover_addr = find_recovery_address(regs->nip);
+
+	/*
+	 * Setup regs->nip to rfi into fixup address.
+	 */
+	if (recover_addr)
+		regs->nip = recover_addr;
+
+out:
+	return !!recover_addr;
 }
 
 static irqreturn_t opal_interrupt(int irq, void *data)
 {
-	uint64_t events;
+	__be64 events;
 
 	opal_handle_interrupt(virq_to_hw(irq), &events);
 
-	/* XXX TODO: Do something with the events */
+	opal_do_notifier(be64_to_cpu(events));
 
 	return IRQ_HANDLED;
 }
 
+static int opal_sysfs_init(void)
+{
+	opal_kobj = kobject_create_and_add("opal", firmware_kobj);
+	if (!opal_kobj) {
+		pr_warn("kobject_create_and_add opal failed\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static int __init opal_init(void)
 {
 	struct device_node *np, *consoles;
-	const u32 *irqs;
+	const __be32 *irqs;
 	int rc, i, irqlen;
 
 	opal_node = of_find_node_by_path("/ibm,opal");
@@ -313,18 +578,20 @@ static int __init opal_init(void)
 		pr_warn("opal: Node not found\n");
 		return -ENODEV;
 	}
+
+	/* Register OPAL consoles if any ports */
 	if (firmware_has_feature(FW_FEATURE_OPALv2))
 		consoles = of_find_node_by_path("/ibm,opal/consoles");
 	else
 		consoles = of_node_get(opal_node);
-
-	/* Register serial ports */
-	for_each_child_of_node(consoles, np) {
-		if (strcmp(np->name, "serial"))
-			continue;
-		of_platform_device_create(np, NULL, NULL);
+	if (consoles) {
+		for_each_child_of_node(consoles, np) {
+			if (strcmp(np->name, "serial"))
+				continue;
+			of_platform_device_create(np, NULL, NULL);
+		}
+		of_node_put(consoles);
 	}
-	of_node_put(consoles);
 
 	/* Find all OPAL interrupts and request them */
 	irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
@@ -345,6 +612,22 @@ static int __init opal_init(void)
 				   " (0x%x)\n", rc, irq, hwirq);
 		opal_irqs[i] = irq;
 	}
+
+	/* Create "opal" kobject under /sys/firmware */
+	rc = opal_sysfs_init();
+	if (rc == 0) {
+		/* Setup error log interface */
+		rc = opal_elog_init();
+		/* Setup code update interface */
+		opal_flash_init();
+		/* Setup platform dump extract interface */
+		opal_platform_dump_init();
+		/* Setup system parameters interface */
+		opal_sys_param_init();
+		/* Setup message log interface. */
+		opal_msglog_init();
+	}
+
 	return 0;
 }
 subsys_initcall(opal_init);
@@ -352,10 +635,91 @@ subsys_initcall(opal_init);
 void opal_shutdown(void)
 {
 	unsigned int i;
+	long rc = OPAL_BUSY;
 
+	/* First free interrupts, which will also mask them */
 	for (i = 0; i < opal_irq_count; i++) {
 		if (opal_irqs[i])
-			free_irq(opal_irqs[i], 0);
+			free_irq(opal_irqs[i], NULL);
 		opal_irqs[i] = 0;
 	}
+
+	/*
+	 * Then sync with OPAL which ensure anything that can
+	 * potentially write to our memory has completed such
+	 * as an ongoing dump retrieval
+	 */
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_sync_host_reboot();
+		if (rc == OPAL_BUSY)
+			opal_poll_events(NULL);
+		else
+			mdelay(10);
+	}
+}
+
+/* Export this so that test modules can use it */
+EXPORT_SYMBOL_GPL(opal_invalid_call);
+
+/* Convert a region of vmalloc memory to an opal sg list */
+struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
+					     unsigned long vmalloc_size)
+{
+	struct opal_sg_list *sg, *first = NULL;
+	unsigned long i = 0;
+
+	sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!sg)
+		goto nomem;
+
+	first = sg;
+
+	while (vmalloc_size > 0) {
+		uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
+		uint64_t length = min(vmalloc_size, PAGE_SIZE);
+
+		sg->entry[i].data = cpu_to_be64(data);
+		sg->entry[i].length = cpu_to_be64(length);
+		i++;
+
+		if (i >= SG_ENTRIES_PER_NODE) {
+			struct opal_sg_list *next;
+
+			next = kzalloc(PAGE_SIZE, GFP_KERNEL);
+			if (!next)
+				goto nomem;
+
+			sg->length = cpu_to_be64(
+					i * sizeof(struct opal_sg_entry) + 16);
+			i = 0;
+			sg->next = cpu_to_be64(__pa(next));
+			sg = next;
+		}
+
+		vmalloc_addr += length;
+		vmalloc_size -= length;
+	}
+
+	sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
+
+	return first;
+
+nomem:
+	pr_err("%s : Failed to allocate memory\n", __func__);
+	opal_free_sg_list(first);
+	return NULL;
+}
+
+void opal_free_sg_list(struct opal_sg_list *sg)
+{
+	while (sg) {
+		uint64_t next = be64_to_cpu(sg->next);
+
+		kfree(sg);
+
+		if (next)
+			sg = __va(next);
+		else
+			sg = NULL;
+	}
 }
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9c9d15e4cdf..de19edeaa7a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -13,6 +13,8 @@
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/crash_dump.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/string.h>
 #include <linux/init.h>
@@ -20,6 +22,7 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/msi.h>
+#include <linux/memblock.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -32,6 +35,7 @@
 #include <asm/iommu.h>
 #include <asm/tce.h>
 #include <asm/xics.h>
+#include <asm/debug.h>
 
 #include "powernv.h"
 #include "pci.h"
@@ -68,6 +72,16 @@ define_pe_printk_level(pe_err, KERN_ERR);
 define_pe_printk_level(pe_warn, KERN_WARNING);
 define_pe_printk_level(pe_info, KERN_INFO);
 
+/*
+ * stdcix is only supposed to be used in hypervisor real mode as per
+ * the architecture spec
+ */
+static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
+{
+	__asm__ __volatile__("stdcix %0,0,%1"
+		: : "r" (val), "r" (paddr) : "memory");
+}
+
 static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
 	unsigned long pe;
@@ -151,13 +165,23 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 		rid_end = pe->rid + 1;
 	}
 
-	/* Associate PE in PELT */
+	/*
+	 * Associate PE in PELT. We need add the PE into the
+	 * corresponding PELT-V as well. Otherwise, the error
+	 * originated from the PE might contribute to other
+	 * PEs.
+	 */
 	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
 			     bcomp, dcomp, fcomp, OPAL_MAP_PE);
 	if (rc) {
 		pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
 		return -ENXIO;
 	}
+
+	rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+				pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
+	if (rc)
+		pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc);
 	opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
 				  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
 
@@ -320,7 +344,6 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
 				pci_name(dev));
 			continue;
 		}
-		pci_dev_get(dev);
 		pdn->pcidev = dev;
 		pdn->pe_number = pe->pe_number;
 		pe->dma_weight += pnv_ioda_dma_weight(dev);
@@ -438,13 +461,57 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
 		return;
 
 	pe = &phb->ioda.pe_array[pdn->pe_number];
+	WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
 	set_iommu_table_base(&pdev->dev, &pe->tce32_table);
 }
 
-static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
-					 u64 *startp, u64 *endp)
+static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
+				     struct pci_dev *pdev, u64 dma_mask)
+{
+	struct pci_dn *pdn = pci_get_pdn(pdev);
+	struct pnv_ioda_pe *pe;
+	uint64_t top;
+	bool bypass = false;
+
+	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+		return -ENODEV;;
+
+	pe = &phb->ioda.pe_array[pdn->pe_number];
+	if (pe->tce_bypass_enabled) {
+		top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+		bypass = (dma_mask >= top);
+	}
+
+	if (bypass) {
+		dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
+		set_dma_ops(&pdev->dev, &dma_direct_ops);
+		set_dma_offset(&pdev->dev, pe->tce_bypass_base);
+	} else {
+		dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
+		set_dma_ops(&pdev->dev, &dma_iommu_ops);
+		set_iommu_table_base(&pdev->dev, &pe->tce32_table);
+	}
+	return 0;
+}
+
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
 {
-	u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		set_iommu_table_base_and_group(&dev->dev, &pe->tce32_table);
+		if (dev->subordinate)
+			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+	}
+}
+
+static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe,
+					 struct iommu_table *tbl,
+					 __be64 *startp, __be64 *endp, bool rm)
+{
+	__be64 __iomem *invalidate = rm ?
+		(__be64 __iomem *)pe->tce_inval_reg_phys :
+		(__be64 __iomem *)tbl->it_index;
 	unsigned long start, end, inc;
 
 	start = __pa(startp);
@@ -471,7 +538,10 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
 
         mb(); /* Ensure above stores are visible */
         while (start <= end) {
-                __raw_writeq(start, invalidate);
+		if (rm)
+			__raw_rm_writeq(cpu_to_be64(start), invalidate);
+		else
+			__raw_writeq(cpu_to_be64(start), invalidate);
                 start += inc;
         }
 
@@ -483,10 +553,12 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
 
 static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
 					 struct iommu_table *tbl,
-					 u64 *startp, u64 *endp)
+					 __be64 *startp, __be64 *endp, bool rm)
 {
 	unsigned long start, end, inc;
-	u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+	__be64 __iomem *invalidate = rm ?
+		(__be64 __iomem *)pe->tce_inval_reg_phys :
+		(__be64 __iomem *)tbl->it_index;
 
 	/* We'll invalidate DMA address in PE scope */
 	start = 0x2ul << 60;
@@ -502,22 +574,25 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe,
 	mb();
 
 	while (start <= end) {
-		__raw_writeq(start, invalidate);
+		if (rm)
+			__raw_rm_writeq(cpu_to_be64(start), invalidate);
+		else
+			__raw_writeq(cpu_to_be64(start), invalidate);
 		start += inc;
 	}
 }
 
 void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
-				 u64 *startp, u64 *endp)
+				 __be64 *startp, __be64 *endp, bool rm)
 {
 	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
 					      tce32_table);
 	struct pnv_phb *phb = pe->phb;
 
 	if (phb->type == PNV_PHB_IODA1)
-		pnv_pci_ioda1_tce_invalidate(tbl, startp, endp);
+		pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm);
 	else
-		pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp);
+		pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm);
 }
 
 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
@@ -589,12 +664,20 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 		 * errors, and on the first pass the data will be a relative
 		 * bus number, print that out instead.
 		 */
-		tbl->it_busno = 0;
-		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
-		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
-			       TCE_PCI_SWINV_PAIR;
+		pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
+		tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
+				8);
+		tbl->it_type |= (TCE_PCI_SWINV_CREATE |
+				 TCE_PCI_SWINV_FREE   |
+				 TCE_PCI_SWINV_PAIR);
 	}
 	iommu_init_table(tbl, phb->hose->node);
+	iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
+
+	if (pe->pdev)
+		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
+	else
+		pnv_ioda_setup_bus_dma(pe, pe->pbus);
 
 	return;
  fail:
@@ -605,6 +688,56 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 		__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
 }
 
+static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+{
+	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
+					      tce32_table);
+	uint16_t window_id = (pe->pe_number << 1 ) + 1;
+	int64_t rc;
+
+	pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
+	if (enable) {
+		phys_addr_t top = memblock_end_of_DRAM();
+
+		top = roundup_pow_of_two(top);
+		rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+						     pe->pe_number,
+						     window_id,
+						     pe->tce_bypass_base,
+						     top);
+	} else {
+		rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+						     pe->pe_number,
+						     window_id,
+						     pe->tce_bypass_base,
+						     0);
+
+		/*
+		 * We might want to reset the DMA ops of all devices on
+		 * this PE. However in theory, that shouldn't be necessary
+		 * as this is used for VFIO/KVM pass-through and the device
+		 * hasn't yet been returned to its kernel driver
+		 */
+	}
+	if (rc)
+		pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
+	else
+		pe->tce_bypass_enabled = enable;
+}
+
+static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb,
+					  struct pnv_ioda_pe *pe)
+{
+	/* TVE #1 is selected by PCI address bit 59 */
+	pe->tce_bypass_base = 1ull << 59;
+
+	/* Install set_bypass callback for VFIO */
+	pe->tce32_table.set_bypass = pnv_pci_ioda2_set_bypass;
+
+	/* Enable bypass by default */
+	pnv_pci_ioda2_set_bypass(&pe->tce32_table, true);
+}
+
 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 				       struct pnv_ioda_pe *pe)
 {
@@ -661,12 +794,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 		 * errors, and on the first pass the data will be a relative
 		 * bus number, print that out instead.
 		 */
-		tbl->it_busno = 0;
-		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
-		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
+		pe->tce_inval_reg_phys = be64_to_cpup(swinvp);
+		tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys,
+				8);
+		tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
 	}
 	iommu_init_table(tbl, phb->hose->node);
+	iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
 
+	if (pe->pdev)
+		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
+	else
+		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+
+	/* Also create a bypass window */
+	pnv_pci_ioda2_setup_bypass_pe(phb, pe);
 	return;
 fail:
 	if (pe->tce32_seg >= 0)
@@ -762,8 +904,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 	struct irq_data *idata;
 	struct irq_chip *ichip;
 	unsigned int xive_num = hwirq - phb->msi_base;
-	uint64_t addr64;
-	uint32_t addr32, data;
+	__be32 data;
 	int rc;
 
 	/* No PE assigned ? bail out ... no MSI for you ! */
@@ -787,6 +928,8 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 	}
 
 	if (is_64) {
+		__be64 addr64;
+
 		rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
 				     &addr64, &data);
 		if (rc) {
@@ -794,9 +937,11 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 				pci_name(dev), rc);
 			return -EIO;
 		}
-		msg->address_hi = addr64 >> 32;
-		msg->address_lo = addr64 & 0xfffffffful;
+		msg->address_hi = be64_to_cpu(addr64) >> 32;
+		msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful;
 	} else {
+		__be32 addr32;
+
 		rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
 				     &addr32, &data);
 		if (rc) {
@@ -805,9 +950,9 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 			return -EIO;
 		}
 		msg->address_hi = 0;
-		msg->address_lo = addr32;
+		msg->address_lo = be32_to_cpu(addr32);
 	}
-	msg->data = data;
+	msg->data = be32_to_cpu(data);
 
 	/*
 	 * Change the IRQ chip for the MSI interrupts on PHB3.
@@ -968,11 +1113,38 @@ static void pnv_pci_ioda_setup_DMA(void)
 	}
 }
 
+static void pnv_pci_ioda_create_dbgfs(void)
+{
+#ifdef CONFIG_DEBUG_FS
+	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb;
+	char name[16];
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+
+		sprintf(name, "PCI%04x", hose->global_number);
+		phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
+		if (!phb->dbgfs)
+			pr_warning("%s: Error on creating debugfs on PHB#%x\n",
+				__func__, hose->global_number);
+	}
+#endif /* CONFIG_DEBUG_FS */
+}
+
 static void pnv_pci_ioda_fixup(void)
 {
 	pnv_pci_ioda_setup_PEs();
 	pnv_pci_ioda_setup_seg();
 	pnv_pci_ioda_setup_DMA();
+
+	pnv_pci_ioda_create_dbgfs();
+
+#ifdef CONFIG_EEH
+	eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
+	eeh_addr_cache_build();
+	eeh_init();
+#endif
 }
 
 /*
@@ -1049,19 +1221,20 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
 		       OPAL_ASSERT_RESET);
 }
 
-void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
+void __init pnv_pci_init_ioda_phb(struct device_node *np,
+				  u64 hub_id, int ioda_type)
 {
 	struct pci_controller *hose;
-	static int primary = 1;
 	struct pnv_phb *phb;
-	unsigned long size, m32map_off, iomap_off, pemap_off;
-	const u64 *prop64;
-	const u32 *prop32;
+	unsigned long size, m32map_off, pemap_off, iomap_off = 0;
+	const __be64 *prop64;
+	const __be32 *prop32;
+	int len;
 	u64 phb_id;
 	void *aux;
 	long rc;
 
-	pr_info(" Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
+	pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
 
 	prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
 	if (!prop64) {
@@ -1072,21 +1245,33 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 	pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
 
 	phb = alloc_bootmem(sizeof(struct pnv_phb));
-	if (phb) {
-		memset(phb, 0, sizeof(struct pnv_phb));
-		phb->hose = hose = pcibios_alloc_controller(np);
+	if (!phb) {
+		pr_err("  Out of memory !\n");
+		return;
 	}
-	if (!phb || !phb->hose) {
-		pr_err("PCI: Failed to allocate PCI controller for %s\n",
+
+	/* Allocate PCI controller */
+	memset(phb, 0, sizeof(struct pnv_phb));
+	phb->hose = hose = pcibios_alloc_controller(np);
+	if (!phb->hose) {
+		pr_err("  Can't allocate PCI controller for %s\n",
 		       np->full_name);
+		free_bootmem((unsigned long)phb, sizeof(struct pnv_phb));
 		return;
 	}
 
 	spin_lock_init(&phb->lock);
-	/* XXX Use device-tree */
-	hose->first_busno = 0;
-	hose->last_busno = 0xff;
+	prop32 = of_get_property(np, "bus-range", &len);
+	if (prop32 && len == 8) {
+		hose->first_busno = be32_to_cpu(prop32[0]);
+		hose->last_busno = be32_to_cpu(prop32[1]);
+	} else {
+		pr_warn("  Broken <bus-range> on %s\n", np->full_name);
+		hose->first_busno = 0;
+		hose->last_busno = 0xff;
+	}
 	hose->private_data = phb;
+	phb->hub_id = hub_id;
 	phb->opal_id = phb_id;
 	phb->type = ioda_type;
 
@@ -1099,8 +1284,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 		phb->model = PNV_PHB_MODEL_UNKNOWN;
 
 	/* Parse 32-bit and IO ranges (if any) */
-	pci_process_bridge_OF_ranges(phb->hose, np, primary);
-	primary = 0;
+	pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
 
 	/* Get registers */
 	phb->regs = of_iomap(np, 0);
@@ -1108,12 +1292,13 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 		pr_err("  Failed to map registers !\n");
 
 	/* Initialize more IODA stuff */
+	phb->ioda.total_pe = 1;
 	prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
-	if (!prop32)
-		phb->ioda.total_pe = 1;
-	else
-		phb->ioda.total_pe = *prop32;
-
+	if (prop32)
+		phb->ioda.total_pe = be32_to_cpup(prop32);
+	prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
+	if (prop32)
+		phb->ioda.reserved_pe = be32_to_cpup(prop32);
 	phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
 	/* FW Has already off top 64k of M32 space (MSI space) */
 	phb->ioda.m32_size += 0x10000;
@@ -1124,24 +1309,24 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 	phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
 	phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
 
-	/* Allocate aux data & arrays
-	 *
-	 * XXX TODO: Don't allocate io segmap on PHB3
-	 */
+	/* Allocate aux data & arrays. We don't have IO ports on PHB3 */
 	size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
 	m32map_off = size;
 	size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
-	iomap_off = size;
-	size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
+	if (phb->type == PNV_PHB_IODA1) {
+		iomap_off = size;
+		size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
+	}
 	pemap_off = size;
 	size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
 	aux = alloc_bootmem(size);
 	memset(aux, 0, size);
 	phb->ioda.pe_alloc = aux;
 	phb->ioda.m32_segmap = aux + m32map_off;
-	phb->ioda.io_segmap = aux + iomap_off;
+	if (phb->type == PNV_PHB_IODA1)
+		phb->ioda.io_segmap = aux + iomap_off;
 	phb->ioda.pe_array = aux + pemap_off;
-	set_bit(0, phb->ioda.pe_alloc);
+	set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc);
 
 	INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
 	INIT_LIST_HEAD(&phb->ioda.pe_list);
@@ -1166,18 +1351,24 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 					 segment_size);
 #endif
 
-	pr_info("  %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
+	pr_info("  %d (%d) PE's M32: 0x%x [segment=0x%x]"
+		" IO: 0x%x [segment=0x%x]\n",
 		phb->ioda.total_pe,
+		phb->ioda.reserved_pe,
 		phb->ioda.m32_size, phb->ioda.m32_segsize,
 		phb->ioda.io_size, phb->ioda.io_segsize);
 
 	phb->hose->ops = &pnv_pci_ops;
+#ifdef CONFIG_EEH
+	phb->eeh_ops = &ioda_eeh_ops;
+#endif
 
 	/* Setup RID -> PE mapping function */
 	phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
 
 	/* Setup TCEs */
 	phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
+	phb->dma_set_mask = pnv_pci_ioda_dma_set_mask;
 
 	/* Setup shutdown function for kexec */
 	phb->shutdown = pnv_pci_ioda_shutdown;
@@ -1195,6 +1386,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
 	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
 	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
+	ppc_md.pcibios_reset_secondary_bus = pnv_pci_reset_secondary_bus;
 	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
 
 	/* Reset IODA tables to a clean state */
@@ -1202,23 +1394,27 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 	if (rc)
 		pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
 
-	/*
-	 * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset
-	 * has cleared the RTT which has the same effect
+	/* If we're running in kdump kerenl, the previous kerenl never
+	 * shutdown PCI devices correctly. We already got IODA table
+	 * cleaned out. So we have to issue PHB reset to stop all PCI
+	 * transactions from previous kerenl.
 	 */
-	if (ioda_type == PNV_PHB_IODA1)
-		opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
+	if (is_kdump_kernel()) {
+		pr_info("  Issue PHB reset ...\n");
+		ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
+		ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET);
+	}
 }
 
-void pnv_pci_init_ioda2_phb(struct device_node *np)
+void __init pnv_pci_init_ioda2_phb(struct device_node *np)
 {
-	pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2);
+	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
 }
 
 void __init pnv_pci_init_ioda_hub(struct device_node *np)
 {
 	struct device_node *phbn;
-	const u64 *prop64;
+	const __be64 *prop64;
 	u64 hub_id;
 
 	pr_info("Probing IODA IO-Hub %s\n", np->full_name);
@@ -1235,6 +1431,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
 	for_each_child_of_node(np, phbn) {
 		/* Look for IODA1 PHBs */
 		if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
-			pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1);
+			pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
 	}
 }
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 92b37a0186c..e3807d69393 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -86,17 +86,20 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
 static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
 					 struct pci_dev *pdev)
 {
-	if (phb->p5ioc2.iommu_table.it_map == NULL)
+	if (phb->p5ioc2.iommu_table.it_map == NULL) {
 		iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node);
+		iommu_register_group(&phb->p5ioc2.iommu_table,
+				pci_domain_nr(phb->hose->bus), phb->opal_id);
+	}
 
-	set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table);
+	set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table);
 }
 
-static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
+static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
 					   void *tce_mem, u64 tce_size)
 {
 	struct pnv_phb *phb;
-	const u64 *prop64;
+	const __be64 *prop64;
 	u64 phb_id;
 	int64_t rc;
 	static int primary = 1;
@@ -133,6 +136,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
 	phb->hose->first_busno = 0;
 	phb->hose->last_busno = 0xff;
 	phb->hose->private_data = phb;
+	phb->hub_id = hub_id;
 	phb->opal_id = phb_id;
 	phb->type = PNV_PHB_P5IOC2;
 	phb->model = PNV_PHB_MODEL_P5IOC2;
@@ -174,7 +178,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
 void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
 {
 	struct device_node *phbn;
-	const u64 *prop64;
+	const __be64 *prop64;
 	u64 hub_id;
 	void *tce_mem;
 	uint64_t tce_per_phb;
@@ -226,7 +230,8 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
 	for_each_child_of_node(np, phbn) {
 		if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
 		    of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
-			pnv_pci_init_p5ioc2_phb(phbn, tce_mem, tce_per_phb);
+			pnv_pci_init_p5ioc2_phb(phbn, hub_id,
+					tce_mem, tce_per_phb);
 			tce_mem += tce_per_phb;
 		}
 	}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 277343cc6a3..f91a4e5d872 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -20,6 +20,7 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/msi.h>
+#include <linux/iommu.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -32,6 +33,8 @@
 #include <asm/iommu.h>
 #include <asm/tce.h>
 #include <asm/firmware.h>
+#include <asm/eeh_event.h>
+#include <asm/eeh.h>
 
 #include "powernv.h"
 #include "pci.h"
@@ -121,77 +124,195 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
 }
 #endif /* CONFIG_PCI_MSI */
 
-static void pnv_pci_dump_p7ioc_diag_data(struct pnv_phb *phb)
+static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
+					 struct OpalIoPhbErrorCommon *common)
 {
-	struct OpalIoP7IOCPhbErrorData *data = &phb->diag.p7ioc;
+	struct OpalIoP7IOCPhbErrorData *data;
 	int i;
 
-	pr_info("PHB %d diagnostic data:\n", phb->hose->global_number);
-
-	pr_info("  brdgCtl              = 0x%08x\n", data->brdgCtl);
-
-	pr_info("  portStatusReg        = 0x%08x\n", data->portStatusReg);
-	pr_info("  rootCmplxStatus      = 0x%08x\n", data->rootCmplxStatus);
-	pr_info("  busAgentStatus       = 0x%08x\n", data->busAgentStatus);
-
-	pr_info("  deviceStatus         = 0x%08x\n", data->deviceStatus);
-	pr_info("  slotStatus           = 0x%08x\n", data->slotStatus);
-	pr_info("  linkStatus           = 0x%08x\n", data->linkStatus);
-	pr_info("  devCmdStatus         = 0x%08x\n", data->devCmdStatus);
-	pr_info("  devSecStatus         = 0x%08x\n", data->devSecStatus);
-
-	pr_info("  rootErrorStatus      = 0x%08x\n", data->rootErrorStatus);
-	pr_info("  uncorrErrorStatus    = 0x%08x\n", data->uncorrErrorStatus);
-	pr_info("  corrErrorStatus      = 0x%08x\n", data->corrErrorStatus);
-	pr_info("  tlpHdr1              = 0x%08x\n", data->tlpHdr1);
-	pr_info("  tlpHdr2              = 0x%08x\n", data->tlpHdr2);
-	pr_info("  tlpHdr3              = 0x%08x\n", data->tlpHdr3);
-	pr_info("  tlpHdr4              = 0x%08x\n", data->tlpHdr4);
-	pr_info("  sourceId             = 0x%08x\n", data->sourceId);
-
-	pr_info("  errorClass           = 0x%016llx\n", data->errorClass);
-	pr_info("  correlator           = 0x%016llx\n", data->correlator);
-
-	pr_info("  p7iocPlssr           = 0x%016llx\n", data->p7iocPlssr);
-	pr_info("  p7iocCsr             = 0x%016llx\n", data->p7iocCsr);
-	pr_info("  lemFir               = 0x%016llx\n", data->lemFir);
-	pr_info("  lemErrorMask         = 0x%016llx\n", data->lemErrorMask);
-	pr_info("  lemWOF               = 0x%016llx\n", data->lemWOF);
-	pr_info("  phbErrorStatus       = 0x%016llx\n", data->phbErrorStatus);
-	pr_info("  phbFirstErrorStatus  = 0x%016llx\n", data->phbFirstErrorStatus);
-	pr_info("  phbErrorLog0         = 0x%016llx\n", data->phbErrorLog0);
-	pr_info("  phbErrorLog1         = 0x%016llx\n", data->phbErrorLog1);
-	pr_info("  mmioErrorStatus      = 0x%016llx\n", data->mmioErrorStatus);
-	pr_info("  mmioFirstErrorStatus = 0x%016llx\n", data->mmioFirstErrorStatus);
-	pr_info("  mmioErrorLog0        = 0x%016llx\n", data->mmioErrorLog0);
-	pr_info("  mmioErrorLog1        = 0x%016llx\n", data->mmioErrorLog1);
-	pr_info("  dma0ErrorStatus      = 0x%016llx\n", data->dma0ErrorStatus);
-	pr_info("  dma0FirstErrorStatus = 0x%016llx\n", data->dma0FirstErrorStatus);
-	pr_info("  dma0ErrorLog0        = 0x%016llx\n", data->dma0ErrorLog0);
-	pr_info("  dma0ErrorLog1        = 0x%016llx\n", data->dma0ErrorLog1);
-	pr_info("  dma1ErrorStatus      = 0x%016llx\n", data->dma1ErrorStatus);
-	pr_info("  dma1FirstErrorStatus = 0x%016llx\n", data->dma1FirstErrorStatus);
-	pr_info("  dma1ErrorLog0        = 0x%016llx\n", data->dma1ErrorLog0);
-	pr_info("  dma1ErrorLog1        = 0x%016llx\n", data->dma1ErrorLog1);
+	data = (struct OpalIoP7IOCPhbErrorData *)common;
+	pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n",
+		hose->global_number, common->version);
+
+	if (data->brdgCtl)
+		pr_info("brdgCtl:     %08x\n",
+			data->brdgCtl);
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("UtlSts:      %08x %08x %08x\n",
+			data->portStatusReg, data->rootCmplxStatus,
+			data->busAgentStatus);
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
+			data->deviceStatus, data->slotStatus,
+			data->linkStatus, data->devCmdStatus,
+			data->devSecStatus);
+	if (data->rootErrorStatus   || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("RootErrSts:  %08x %08x %08x\n",
+			data->rootErrorStatus, data->uncorrErrorStatus,
+			data->corrErrorStatus);
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
+			data->tlpHdr1, data->tlpHdr2,
+			data->tlpHdr3, data->tlpHdr4);
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("RootErrLog1: %08x %016llx %016llx\n",
+			data->sourceId, data->errorClass,
+			data->correlator);
+	if (data->p7iocPlssr || data->p7iocCsr)
+		pr_info("PhbSts:      %016llx %016llx\n",
+			data->p7iocPlssr, data->p7iocCsr);
+	if (data->lemFir)
+		pr_info("Lem:         %016llx %016llx %016llx\n",
+			data->lemFir, data->lemErrorMask,
+			data->lemWOF);
+	if (data->phbErrorStatus)
+		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
+			data->phbErrorStatus, data->phbFirstErrorStatus,
+			data->phbErrorLog0, data->phbErrorLog1);
+	if (data->mmioErrorStatus)
+		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
+			data->mmioErrorStatus, data->mmioFirstErrorStatus,
+			data->mmioErrorLog0, data->mmioErrorLog1);
+	if (data->dma0ErrorStatus)
+		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma0ErrorStatus, data->dma0FirstErrorStatus,
+			data->dma0ErrorLog0, data->dma0ErrorLog1);
+	if (data->dma1ErrorStatus)
+		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
+			data->dma1ErrorStatus, data->dma1FirstErrorStatus,
+			data->dma1ErrorLog0, data->dma1ErrorLog1);
 
 	for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
 		if ((data->pestA[i] >> 63) == 0 &&
 		    (data->pestB[i] >> 63) == 0)
 			continue;
-		pr_info("  PE[%3d] PESTA        = 0x%016llx\n", i, data->pestA[i]);
-		pr_info("          PESTB        = 0x%016llx\n", data->pestB[i]);
+
+		pr_info("PE[%3d] A/B: %016llx %016llx\n",
+			i, data->pestA[i], data->pestB[i]);
 	}
 }
 
-static void pnv_pci_dump_phb_diag_data(struct pnv_phb *phb)
+static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
+					struct OpalIoPhbErrorCommon *common)
 {
-	switch(phb->model) {
-	case PNV_PHB_MODEL_P7IOC:
-		pnv_pci_dump_p7ioc_diag_data(phb);
+	struct OpalIoPhb3ErrorData *data;
+	int i;
+
+	data = (struct OpalIoPhb3ErrorData*)common;
+	pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n",
+		hose->global_number, be32_to_cpu(common->version));
+	if (data->brdgCtl)
+		pr_info("brdgCtl:     %08x\n",
+			be32_to_cpu(data->brdgCtl));
+	if (data->portStatusReg || data->rootCmplxStatus ||
+	    data->busAgentStatus)
+		pr_info("UtlSts:      %08x %08x %08x\n",
+			be32_to_cpu(data->portStatusReg),
+			be32_to_cpu(data->rootCmplxStatus),
+			be32_to_cpu(data->busAgentStatus));
+	if (data->deviceStatus || data->slotStatus   ||
+	    data->linkStatus   || data->devCmdStatus ||
+	    data->devSecStatus)
+		pr_info("RootSts:     %08x %08x %08x %08x %08x\n",
+			be32_to_cpu(data->deviceStatus),
+			be32_to_cpu(data->slotStatus),
+			be32_to_cpu(data->linkStatus),
+			be32_to_cpu(data->devCmdStatus),
+			be32_to_cpu(data->devSecStatus));
+	if (data->rootErrorStatus || data->uncorrErrorStatus ||
+	    data->corrErrorStatus)
+		pr_info("RootErrSts:  %08x %08x %08x\n",
+			be32_to_cpu(data->rootErrorStatus),
+			be32_to_cpu(data->uncorrErrorStatus),
+			be32_to_cpu(data->corrErrorStatus));
+	if (data->tlpHdr1 || data->tlpHdr2 ||
+	    data->tlpHdr3 || data->tlpHdr4)
+		pr_info("RootErrLog:  %08x %08x %08x %08x\n",
+			be32_to_cpu(data->tlpHdr1),
+			be32_to_cpu(data->tlpHdr2),
+			be32_to_cpu(data->tlpHdr3),
+			be32_to_cpu(data->tlpHdr4));
+	if (data->sourceId || data->errorClass ||
+	    data->correlator)
+		pr_info("RootErrLog1: %08x %016llx %016llx\n",
+			be32_to_cpu(data->sourceId),
+			be64_to_cpu(data->errorClass),
+			be64_to_cpu(data->correlator));
+	if (data->nFir)
+		pr_info("nFir:        %016llx %016llx %016llx\n",
+			be64_to_cpu(data->nFir),
+			be64_to_cpu(data->nFirMask),
+			be64_to_cpu(data->nFirWOF));
+	if (data->phbPlssr || data->phbCsr)
+		pr_info("PhbSts:      %016llx %016llx\n",
+			be64_to_cpu(data->phbPlssr),
+			be64_to_cpu(data->phbCsr));
+	if (data->lemFir)
+		pr_info("Lem:         %016llx %016llx %016llx\n",
+			be64_to_cpu(data->lemFir),
+			be64_to_cpu(data->lemErrorMask),
+			be64_to_cpu(data->lemWOF));
+	if (data->phbErrorStatus)
+		pr_info("PhbErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->phbErrorStatus),
+			be64_to_cpu(data->phbFirstErrorStatus),
+			be64_to_cpu(data->phbErrorLog0),
+			be64_to_cpu(data->phbErrorLog1));
+	if (data->mmioErrorStatus)
+		pr_info("OutErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->mmioErrorStatus),
+			be64_to_cpu(data->mmioFirstErrorStatus),
+			be64_to_cpu(data->mmioErrorLog0),
+			be64_to_cpu(data->mmioErrorLog1));
+	if (data->dma0ErrorStatus)
+		pr_info("InAErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma0ErrorStatus),
+			be64_to_cpu(data->dma0FirstErrorStatus),
+			be64_to_cpu(data->dma0ErrorLog0),
+			be64_to_cpu(data->dma0ErrorLog1));
+	if (data->dma1ErrorStatus)
+		pr_info("InBErr:      %016llx %016llx %016llx %016llx\n",
+			be64_to_cpu(data->dma1ErrorStatus),
+			be64_to_cpu(data->dma1FirstErrorStatus),
+			be64_to_cpu(data->dma1ErrorLog0),
+			be64_to_cpu(data->dma1ErrorLog1));
+
+	for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
+		if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
+		    (be64_to_cpu(data->pestB[i]) >> 63) == 0)
+			continue;
+
+		pr_info("PE[%3d] A/B: %016llx %016llx\n",
+				i, be64_to_cpu(data->pestA[i]),
+				be64_to_cpu(data->pestB[i]));
+	}
+}
+
+void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+				unsigned char *log_buff)
+{
+	struct OpalIoPhbErrorCommon *common;
+
+	if (!hose || !log_buff)
+		return;
+
+	common = (struct OpalIoPhbErrorCommon *)log_buff;
+	switch (be32_to_cpu(common->ioType)) {
+	case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
+		pnv_pci_dump_p7ioc_diag_data(hose, common);
+		break;
+	case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
+		pnv_pci_dump_phb3_diag_data(hose, common);
 		break;
 	default:
-		pr_warning("PCI %d: Can't decode this PHB diag data\n",
-			   phb->hose->global_number);
+		pr_warn("%s: Unrecognized ioType %d\n",
+			__func__, be32_to_cpu(common->ioType));
 	}
 }
 
@@ -202,7 +323,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
 
 	spin_lock_irqsave(&phb->lock, flags);
 
-	rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
+	rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+					 PNV_PCI_DIAG_BUF_SIZE);
 	has_diag = (rc == OPAL_SUCCESS);
 
 	rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
@@ -218,7 +340,7 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
 		 * with the normal errors generated when probing empty slots
 		 */
 		if (has_diag)
-			pnv_pci_dump_phb_diag_data(phb);
+			pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
 		else
 			pr_warning("PCI %d: No diag data available\n",
 				   phb->hose->global_number);
@@ -227,43 +349,51 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
 	spin_unlock_irqrestore(&phb->lock, flags);
 }
 
-static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus,
-				     u32 bdfn)
+static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
+				     struct device_node *dn)
 {
 	s64	rc;
 	u8	fstate;
-	u16	pcierr;
+	__be16	pcierr;
 	u32	pe_no;
 
-	/* Get PE# if we support IODA */
-	pe_no = phb->bdfn_to_pe ? phb->bdfn_to_pe(phb, bus, bdfn & 0xff) : 0;
+	/*
+	 * Get the PE#. During the PCI probe stage, we might not
+	 * setup that yet. So all ER errors should be mapped to
+	 * reserved PE.
+	 */
+	pe_no = PCI_DN(dn)->pe_number;
+	if (pe_no == IODA_INVALID_PE) {
+		if (phb->type == PNV_PHB_P5IOC2)
+			pe_no = 0;
+		else
+			pe_no = phb->ioda.reserved_pe;
+	}
 
 	/* Read freeze status */
 	rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr,
 					NULL);
 	if (rc) {
-		pr_warning("PCI %d: Failed to read EEH status for PE#%d,"
-			   " err %lld\n", phb->hose->global_number, pe_no, rc);
+		pr_warning("%s: Can't read EEH status (PE#%d) for "
+			   "%s, err %lld\n",
+			   __func__, pe_no, dn->full_name, rc);
 		return;
 	}
-	cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n",
-		bdfn, pe_no, fstate);
+	cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
+		(PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
+		pe_no, fstate);
 	if (fstate != 0)
 		pnv_pci_handle_eeh_config(phb, pe_no);
 }
 
-static int pnv_pci_read_config(struct pci_bus *bus,
-			       unsigned int devfn,
-			       int where, int size, u32 *val)
+int pnv_pci_cfg_read(struct device_node *dn,
+		     int where, int size, u32 *val)
 {
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
-	u32 bdfn = (((uint64_t)bus->number) << 8) | devfn;
+	struct pci_dn *pdn = PCI_DN(dn);
+	struct pnv_phb *phb = pdn->phb->private_data;
+	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
 	s64 rc;
 
-	if (hose == NULL)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
 	switch (size) {
 	case 1: {
 		u8 v8;
@@ -272,43 +402,36 @@ static int pnv_pci_read_config(struct pci_bus *bus,
 		break;
 	}
 	case 2: {
-		u16 v16;
+		__be16 v16;
 		rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where,
 						   &v16);
-		*val = (rc == OPAL_SUCCESS) ? v16 : 0xffff;
+		*val = (rc == OPAL_SUCCESS) ? be16_to_cpu(v16) : 0xffff;
 		break;
 	}
 	case 4: {
-		u32 v32;
+		__be32 v32;
 		rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32);
-		*val = (rc == OPAL_SUCCESS) ? v32 : 0xffffffff;
+		*val = (rc == OPAL_SUCCESS) ? be32_to_cpu(v32) : 0xffffffff;
 		break;
 	}
 	default:
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
-	cfg_dbg("pnv_pci_read_config bus: %x devfn: %x +%x/%x -> %08x\n",
-		bus->number, devfn, where, size, *val);
-
-	/* Check if the PHB got frozen due to an error (no response) */
-	pnv_pci_config_check_eeh(phb, bus, bdfn);
 
+	cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+		__func__, pdn->busno, pdn->devfn, where, size, *val);
 	return PCIBIOS_SUCCESSFUL;
 }
 
-static int pnv_pci_write_config(struct pci_bus *bus,
-				unsigned int devfn,
-				int where, int size, u32 val)
+int pnv_pci_cfg_write(struct device_node *dn,
+		      int where, int size, u32 val)
 {
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
-	u32 bdfn = (((uint64_t)bus->number) << 8) | devfn;
+	struct pci_dn *pdn = PCI_DN(dn);
+	struct pnv_phb *phb = pdn->phb->private_data;
+	u32 bdfn = (pdn->busno << 8) | pdn->devfn;
 
-	if (hose == NULL)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-
-	cfg_dbg("pnv_pci_write_config bus: %x devfn: %x +%x/%x -> %08x\n",
-		bus->number, devfn, where, size, val);
+	cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+		pdn->busno, pdn->devfn, where, size, val);
 	switch (size) {
 	case 1:
 		opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
@@ -322,23 +445,117 @@ static int pnv_pci_write_config(struct pci_bus *bus,
 	default:
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
-	/* Check if the PHB got frozen due to an error (no response) */
-	pnv_pci_config_check_eeh(phb, bus, bdfn);
 
 	return PCIBIOS_SUCCESSFUL;
 }
 
+#if CONFIG_EEH
+static bool pnv_pci_cfg_check(struct pci_controller *hose,
+			      struct device_node *dn)
+{
+	struct eeh_dev *edev = NULL;
+	struct pnv_phb *phb = hose->private_data;
+
+	/* EEH not enabled ? */
+	if (!(phb->flags & PNV_PHB_FLAG_EEH))
+		return true;
+
+	/* PE reset or device removed ? */
+	edev = of_node_to_eeh_dev(dn);
+	if (edev) {
+		if (edev->pe &&
+		    (edev->pe->state & EEH_PE_RESET))
+			return false;
+
+		if (edev->mode & EEH_DEV_REMOVED)
+			return false;
+	}
+
+	return true;
+}
+#else
+static inline pnv_pci_cfg_check(struct pci_controller *hose,
+				struct device_node *dn)
+{
+	return true;
+}
+#endif /* CONFIG_EEH */
+
+static int pnv_pci_read_config(struct pci_bus *bus,
+			       unsigned int devfn,
+			       int where, int size, u32 *val)
+{
+	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
+	struct pci_dn *pdn;
+	struct pnv_phb *phb;
+	bool found = false;
+	int ret;
+
+	*val = 0xFFFFFFFF;
+	for (dn = busdn->child; dn; dn = dn->sibling) {
+		pdn = PCI_DN(dn);
+		if (pdn && pdn->devfn == devfn) {
+			phb = pdn->phb->private_data;
+			found = true;
+			break;
+		}
+	}
+
+	if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	ret = pnv_pci_cfg_read(dn, where, size, val);
+	if (phb->flags & PNV_PHB_FLAG_EEH) {
+		if (*val == EEH_IO_ERROR_VALUE(size) &&
+		    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+                        return PCIBIOS_DEVICE_NOT_FOUND;
+	} else {
+		pnv_pci_config_check_eeh(phb, dn);
+	}
+
+	return ret;
+}
+
+static int pnv_pci_write_config(struct pci_bus *bus,
+				unsigned int devfn,
+				int where, int size, u32 val)
+{
+	struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
+	struct pci_dn *pdn;
+	struct pnv_phb *phb;
+	bool found = false;
+	int ret;
+
+	for (dn = busdn->child; dn; dn = dn->sibling) {
+		pdn = PCI_DN(dn);
+		if (pdn && pdn->devfn == devfn) {
+			phb = pdn->phb->private_data;
+			found = true;
+			break;
+		}
+	}
+
+	if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	ret = pnv_pci_cfg_write(dn, where, size, val);
+	if (!(phb->flags & PNV_PHB_FLAG_EEH))
+		pnv_pci_config_check_eeh(phb, dn);
+
+	return ret;
+}
+
 struct pci_ops pnv_pci_ops = {
-	.read = pnv_pci_read_config,
+	.read  = pnv_pci_read_config,
 	.write = pnv_pci_write_config,
 };
 
 static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
 			 unsigned long uaddr, enum dma_data_direction direction,
-			 struct dma_attrs *attrs)
+			 struct dma_attrs *attrs, bool rm)
 {
 	u64 proto_tce;
-	u64 *tcep, *tces;
+	__be64 *tcep, *tces;
 	u64 rpn;
 
 	proto_tce = TCE_PCI_READ; // Read allowed
@@ -346,33 +563,48 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
 	if (direction != DMA_TO_DEVICE)
 		proto_tce |= TCE_PCI_WRITE;
 
-	tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset;
+	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
 	rpn = __pa(uaddr) >> TCE_SHIFT;
 
 	while (npages--)
-		*(tcep++) = proto_tce | (rpn++ << TCE_RPN_SHIFT);
+		*(tcep++) = cpu_to_be64(proto_tce | (rpn++ << TCE_RPN_SHIFT));
 
 	/* Some implementations won't cache invalid TCEs and thus may not
 	 * need that flush. We'll probably turn it_type into a bit mask
 	 * of flags if that becomes the case
 	 */
 	if (tbl->it_type & TCE_PCI_SWINV_CREATE)
-		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1);
+		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
 
 	return 0;
 }
 
-static void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
+static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages,
+			    unsigned long uaddr,
+			    enum dma_data_direction direction,
+			    struct dma_attrs *attrs)
 {
-	u64 *tcep, *tces;
+	return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs,
+			false);
+}
+
+static void pnv_tce_free(struct iommu_table *tbl, long index, long npages,
+		bool rm)
+{
+	__be64 *tcep, *tces;
 
-	tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset;
+	tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset;
 
 	while (npages--)
-		*(tcep++) = 0;
+		*(tcep++) = cpu_to_be64(0);
 
 	if (tbl->it_type & TCE_PCI_SWINV_FREE)
-		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1);
+		pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm);
+}
+
+static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages)
+{
+	pnv_tce_free(tbl, index, npages, false);
 }
 
 static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
@@ -380,13 +612,27 @@ static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
 	return ((u64 *)tbl->it_base)[index - tbl->it_offset];
 }
 
+static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages,
+			    unsigned long uaddr,
+			    enum dma_data_direction direction,
+			    struct dma_attrs *attrs)
+{
+	return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true);
+}
+
+static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages)
+{
+	pnv_tce_free(tbl, index, npages, true);
+}
+
 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 			       void *tce_mem, u64 tce_size,
 			       u64 dma_offset)
 {
 	tbl->it_blocksize = 16;
 	tbl->it_base = (unsigned long)tce_mem;
-	tbl->it_offset = dma_offset >> IOMMU_PAGE_SHIFT;
+	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
+	tbl->it_offset = dma_offset >> tbl->it_page_shift;
 	tbl->it_index = 0;
 	tbl->it_size = tce_size >> 3;
 	tbl->it_busno = 0;
@@ -412,13 +658,14 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
 	pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
 				  be32_to_cpup(sizep), 0);
 	iommu_init_table(tbl, hose->node);
+	iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
 
 	/* Deal with SW invalidated TCEs when needed (BML way) */
 	swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
 				 NULL);
 	if (swinvp) {
-		tbl->it_busno = swinvp[1];
-		tbl->it_index = (unsigned long)ioremap(swinvp[0], 8);
+		tbl->it_busno = be64_to_cpu(swinvp[1]);
+		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
 		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
 	}
 	return tbl;
@@ -437,7 +684,7 @@ static void pnv_pci_dma_fallback_setup(struct pci_controller *hose,
 		pdn->iommu_table = pnv_pci_setup_bml_iommu(hose);
 	if (!pdn->iommu_table)
 		return;
-	set_iommu_table_base(&pdev->dev, pdn->iommu_table);
+	set_iommu_table_base_and_group(&pdev->dev, pdn->iommu_table);
 }
 
 static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
@@ -454,6 +701,16 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 		pnv_pci_dma_fallback_setup(hose, pdev);
 }
 
+int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	if (phb && phb->dma_set_mask)
+		return phb->dma_set_mask(phb, pdev, dma_mask);
+	return __dma_set_mask(&pdev->dev, dma_mask);
+}
+
 void pnv_pci_shutdown(void)
 {
 	struct pci_controller *hose;
@@ -543,8 +800,10 @@ void __init pnv_pci_init(void)
 
 	/* Configure IOMMU DMA hooks */
 	ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup;
-	ppc_md.tce_build = pnv_tce_build;
-	ppc_md.tce_free = pnv_tce_free;
+	ppc_md.tce_build = pnv_tce_build_vm;
+	ppc_md.tce_free = pnv_tce_free_vm;
+	ppc_md.tce_build_rm = pnv_tce_build_rm;
+	ppc_md.tce_free_rm = pnv_tce_free_rm;
 	ppc_md.tce_get = pnv_tce_get;
 	ppc_md.pci_probe_mode = pnv_pci_probe_mode;
 	set_pci_dma_ops(&dma_iommu_ops);
@@ -556,3 +815,32 @@ void __init pnv_pci_init(void)
 	ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs;
 #endif
 }
+
+static int tce_iommu_bus_notifier(struct notifier_block *nb,
+		unsigned long action, void *data)
+{
+	struct device *dev = data;
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		return iommu_add_device(dev);
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (dev->iommu_group)
+			iommu_del_device(dev);
+		return 0;
+	default:
+		return 0;
+	}
+}
+
+static struct notifier_block tce_iommu_bus_nb = {
+	.notifier_call = tce_iommu_bus_notifier,
+};
+
+static int __init tce_iommu_bus_notifier_init(void)
+{
+	bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
+	return 0;
+}
+
+subsys_initcall_sync(tce_iommu_bus_notifier_init);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 25d76c4df50..676232c3432 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -17,7 +17,7 @@ enum pnv_phb_model {
 	PNV_PHB_MODEL_PHB3,
 };
 
-#define PNV_PCI_DIAG_BUF_SIZE	4096
+#define PNV_PCI_DIAG_BUF_SIZE	8192
 #define PNV_IODA_PE_DEV		(1 << 0)	/* PE has single PCI device	*/
 #define PNV_IODA_PE_BUS		(1 << 1)	/* PE has primary PCI bus	*/
 #define PNV_IODA_PE_BUS_ALL	(1 << 2)	/* PE has subordinate buses	*/
@@ -52,8 +52,11 @@ struct pnv_ioda_pe {
 	int			tce32_seg;
 	int			tce32_segcount;
 	struct iommu_table	tce32_table;
+	phys_addr_t		tce_inval_reg_phys;
 
-	/* XXX TODO: Add support for additional 64-bit iommus */
+	/* 64-bit TCE bypass region */
+	bool			tce_bypass_enabled;
+	uint64_t		tce_bypass_base;
 
 	/* MSIs. MVE index is identical for for 32 and 64 bit MSI
 	 * and -1 if not supported. (It's actually identical to the
@@ -66,15 +69,42 @@ struct pnv_ioda_pe {
 	struct list_head	list;
 };
 
+/* IOC dependent EEH operations */
+#ifdef CONFIG_EEH
+struct pnv_eeh_ops {
+	int (*post_init)(struct pci_controller *hose);
+	int (*set_option)(struct eeh_pe *pe, int option);
+	int (*get_state)(struct eeh_pe *pe);
+	int (*reset)(struct eeh_pe *pe, int option);
+	int (*get_log)(struct eeh_pe *pe, int severity,
+		       char *drv_log, unsigned long len);
+	int (*configure_bridge)(struct eeh_pe *pe);
+	int (*next_error)(struct eeh_pe **pe);
+};
+#endif /* CONFIG_EEH */
+
+#define PNV_PHB_FLAG_EEH	(1 << 0)
+
 struct pnv_phb {
 	struct pci_controller	*hose;
 	enum pnv_phb_type	type;
 	enum pnv_phb_model	model;
+	u64			hub_id;
 	u64			opal_id;
+	int			flags;
 	void __iomem		*regs;
 	int			initialized;
 	spinlock_t		lock;
 
+#ifdef CONFIG_EEH
+	struct pnv_eeh_ops	*eeh_ops;
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+	int			has_dbgfs;
+	struct dentry		*dbgfs;
+#endif
+
 #ifdef CONFIG_PCI_MSI
 	unsigned int		msi_base;
 	unsigned int		msi32_support;
@@ -84,6 +114,8 @@ struct pnv_phb {
 			 unsigned int hwirq, unsigned int virq,
 			 unsigned int is_64, struct msi_msg *msg);
 	void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
+	int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev,
+			    u64 dma_mask);
 	void (*fixup_phb)(struct pci_controller *hose);
 	u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
 	void (*shutdown)(struct pnv_phb *phb);
@@ -96,6 +128,7 @@ struct pnv_phb {
 		struct {
 			/* Global bridge info */
 			unsigned int		total_pe;
+			unsigned int		reserved_pe;
 			unsigned int		m32_size;
 			unsigned int		m32_segsize;
 			unsigned int		m32_pci_base;
@@ -142,15 +175,27 @@ struct pnv_phb {
 		} ioda;
 	};
 
-	/* PHB status structure */
+	/* PHB and hub status structure */
 	union {
 		unsigned char			blob[PNV_PCI_DIAG_BUF_SIZE];
 		struct OpalIoP7IOCPhbErrorData	p7ioc;
+		struct OpalIoPhb3ErrorData	phb3;
+		struct OpalIoP7IOCErrorData 	hub_diag;
 	} diag;
+
 };
 
 extern struct pci_ops pnv_pci_ops;
+#ifdef CONFIG_EEH
+extern struct pnv_eeh_ops ioda_eeh_ops;
+#endif
 
+void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+				unsigned char *log_buff);
+int pnv_pci_cfg_read(struct device_node *dn,
+		     int where, int size, u32 *val);
+int pnv_pci_cfg_write(struct device_node *dn,
+		      int where, int size, u32 val);
 extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 				      void *tce_mem, u64 tce_size,
 				      u64 dma_offset);
@@ -158,6 +203,8 @@ extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
 extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
-					u64 *startp, u64 *endp);
+					__be64 *startp, __be64 *endp, bool rm);
+extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
+extern int ioda_eeh_phb_reset(struct pci_controller *hose, int option);
 
 #endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index a1c6f83fc39..75501bfede7 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -7,12 +7,24 @@ extern void pnv_smp_init(void);
 static inline void pnv_smp_init(void) { }
 #endif
 
+struct pci_dev;
+
 #ifdef CONFIG_PCI
 extern void pnv_pci_init(void);
 extern void pnv_pci_shutdown(void);
+extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask);
 #else
 static inline void pnv_pci_init(void) { }
 static inline void pnv_pci_shutdown(void) { }
+
+static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
+{
+	return -ENODEV;
+}
 #endif
 
+extern void pnv_lpc_init(void);
+
+bool cpu_core_split_required(void);
+
 #endif /* _POWERNV_H */
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
new file mode 100644
index 00000000000..1cb160dc160
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)	"powernv-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <asm/archrandom.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+
+
+struct powernv_rng {
+	void __iomem *regs;
+	unsigned long mask;
+};
+
+static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
+
+
+static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
+{
+	unsigned long parity;
+
+	/* Calculate the parity of the value */
+	asm ("popcntd %0,%1" : "=r" (parity) : "r" (val));
+
+	/* xor our value with the previous mask */
+	val ^= rng->mask;
+
+	/* update the mask based on the parity of this value */
+	rng->mask = (rng->mask << 1) | (parity & 1);
+
+	return val;
+}
+
+int powernv_get_random_long(unsigned long *v)
+{
+	struct powernv_rng *rng;
+
+	rng = get_cpu_var(powernv_rng);
+
+	*v = rng_whiten(rng, in_be64(rng->regs));
+
+	put_cpu_var(rng);
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(powernv_get_random_long);
+
+static __init void rng_init_per_cpu(struct powernv_rng *rng,
+				    struct device_node *dn)
+{
+	int chip_id, cpu;
+
+	chip_id = of_get_ibm_chip_id(dn);
+	if (chip_id == -1)
+		pr_warn("No ibm,chip-id found for %s.\n", dn->full_name);
+
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(powernv_rng, cpu) == NULL ||
+		    cpu_to_chip_id(cpu) == chip_id) {
+			per_cpu(powernv_rng, cpu) = rng;
+		}
+	}
+}
+
+static __init int rng_create(struct device_node *dn)
+{
+	struct powernv_rng *rng;
+	unsigned long val;
+
+	rng = kzalloc(sizeof(*rng), GFP_KERNEL);
+	if (!rng)
+		return -ENOMEM;
+
+	rng->regs = of_iomap(dn, 0);
+	if (!rng->regs) {
+		kfree(rng);
+		return -ENXIO;
+	}
+
+	val = in_be64(rng->regs);
+	rng->mask = val;
+
+	rng_init_per_cpu(rng, dn);
+
+	pr_info_once("Registering arch random hook.\n");
+
+	ppc_md.get_random_long = powernv_get_random_long;
+
+	return 0;
+}
+
+static __init int rng_init(void)
+{
+	struct device_node *dn;
+	int rc;
+
+	for_each_compatible_node(dn, NULL, "ibm,power-rng") {
+		rc = rng_create(dn);
+		if (rc) {
+			pr_err("Failed creating rng for %s (%d).\n",
+				dn->full_name, rc);
+			continue;
+		}
+
+		/* Create devices for hwrng driver */
+		of_platform_device_create(dn, NULL, NULL);
+	}
+
+	return 0;
+}
+subsys_initcall(rng_init);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d4459bfc92f..d9b88fa7c5a 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -23,19 +23,26 @@
 #include <linux/irq.h>
 #include <linux/seq_file.h>
 #include <linux/of.h>
+#include <linux/of_fdt.h>
 #include <linux/interrupt.h>
 #include <linux/bug.h>
+#include <linux/pci.h>
+#include <linux/cpufreq.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
 #include <asm/xics.h>
 #include <asm/rtas.h>
 #include <asm/opal.h>
+#include <asm/kexec.h>
+#include <asm/smp.h>
 
 #include "powernv.h"
 
 static void __init pnv_setup_arch(void)
 {
+	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+
 	/* Initialize SMP */
 	pnv_smp_init();
 
@@ -54,6 +61,12 @@ static void __init pnv_setup_arch(void)
 
 static void __init pnv_init_early(void)
 {
+	/*
+	 * Initialize the LPC bus now so that legacy serial
+	 * ports can be found on it
+	 */
+	opal_lpc_init();
+
 #ifdef CONFIG_HVC_OPAL
 	if (firmware_has_feature(FW_FEATURE_OPAL))
 		hvc_opal_init_early();
@@ -89,10 +102,33 @@ static void pnv_show_cpuinfo(struct seq_file *m)
 	of_node_put(root);
 }
 
+static void pnv_prepare_going_down(void)
+{
+	/*
+	 * Disable all notifiers from OPAL, we can't
+	 * service interrupts anymore anyway
+	 */
+	opal_notifier_disable();
+
+	/* Soft disable interrupts */
+	local_irq_disable();
+
+	/*
+	 * Return secondary CPUs to firwmare if a flash update
+	 * is pending otherwise we will get all sort of error
+	 * messages about CPU being stuck etc.. This will also
+	 * have the side effect of hard disabling interrupts so
+	 * past this point, the kernel is effectively dead.
+	 */
+	opal_flash_term_callback();
+}
+
 static void  __noreturn pnv_restart(char *cmd)
 {
 	long rc = OPAL_BUSY;
 
+	pnv_prepare_going_down();
+
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 		rc = opal_cec_reboot();
 		if (rc == OPAL_BUSY_EVENT)
@@ -108,6 +144,8 @@ static void __noreturn pnv_power_off(void)
 {
 	long rc = OPAL_BUSY;
 
+	pnv_prepare_going_down();
+
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 		rc = opal_cec_power_down(0);
 		if (rc == OPAL_BUSY_EVENT)
@@ -128,24 +166,94 @@ static void pnv_progress(char *s, unsigned short hex)
 {
 }
 
+static int pnv_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (dev_is_pci(dev))
+		return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask);
+	return __dma_set_mask(dev, dma_mask);
+}
+
 static void pnv_shutdown(void)
 {
 	/* Let the PCI code clear up IODA tables */
 	pnv_pci_shutdown();
 
-	/* And unregister all OPAL interrupts so they don't fire
-	 * up while we kexec
+	/*
+	 * Stop OPAL activity: Unregister all OPAL interrupts so they
+	 * don't fire up while we kexec and make sure all potentially
+	 * DMA'ing ops are complete (such as dump retrieval).
 	 */
 	opal_shutdown();
 }
 
 #ifdef CONFIG_KEXEC
+static void pnv_kexec_wait_secondaries_down(void)
+{
+	int my_cpu, i, notified = -1;
+
+	my_cpu = get_cpu();
+
+	for_each_online_cpu(i) {
+		uint8_t status;
+		int64_t rc;
+
+		if (i == my_cpu)
+			continue;
+
+		for (;;) {
+			rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
+						   &status);
+			if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
+				break;
+			barrier();
+			if (i != notified) {
+				printk(KERN_INFO "kexec: waiting for cpu %d "
+				       "(physical %d) to enter OPAL\n",
+				       i, paca[i].hw_cpu_id);
+				notified = i;
+			}
+		}
+	}
+}
+
 static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 {
 	xics_kexec_teardown_cpu(secondary);
+
+	/* On OPAL v3, we return all CPUs to firmware */
+
+	if (!firmware_has_feature(FW_FEATURE_OPALv3))
+		return;
+
+	if (secondary) {
+		/* Return secondary CPUs to firmware on OPAL v3 */
+		mb();
+		get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
+		mb();
+
+		/* Return the CPU to OPAL */
+		opal_return_cpu();
+	} else if (crash_shutdown) {
+		/*
+		 * On crash, we don't wait for secondaries to go
+		 * down as they might be unreachable or hung, so
+		 * instead we just wait a bit and move on.
+		 */
+		mdelay(1);
+	} else {
+		/* Primary waits for the secondaries to have reached OPAL */
+		pnv_kexec_wait_secondaries_down();
+	}
 }
 #endif /* CONFIG_KEXEC */
 
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+static unsigned long pnv_memory_block_size(void)
+{
+	return 256UL * 1024 * 1024;
+}
+#endif
+
 static void __init pnv_setup_machdep_opal(void)
 {
 	ppc_md.get_boot_time = opal_get_boot_time;
@@ -155,6 +263,7 @@ static void __init pnv_setup_machdep_opal(void)
 	ppc_md.power_off = pnv_power_off;
 	ppc_md.halt = pnv_halt;
 	ppc_md.machine_check_exception = opal_machine_check;
+	ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
 }
 
 #ifdef CONFIG_PPC_POWERNV_RTAS
@@ -192,6 +301,25 @@ static int __init pnv_probe(void)
 	return 1;
 }
 
+/*
+ * Returns the cpu frequency for 'cpu' in Hz. This is used by
+ * /proc/cpuinfo
+ */
+unsigned long pnv_get_proc_freq(unsigned int cpu)
+{
+	unsigned long ret_freq;
+
+	ret_freq = cpufreq_quick_get(cpu) * 1000ul;
+
+	/*
+	 * If the backend cpufreq driver does not exist,
+         * then fallback to old way of reporting the clockrate.
+	 */
+	if (!ret_freq)
+		ret_freq = ppc_proc_freq;
+	return ret_freq;
+}
+
 define_machine(powernv) {
 	.name			= "PowerNV",
 	.probe			= pnv_probe,
@@ -199,11 +327,16 @@ define_machine(powernv) {
 	.setup_arch		= pnv_setup_arch,
 	.init_IRQ		= pnv_init_IRQ,
 	.show_cpuinfo		= pnv_show_cpuinfo,
+	.get_proc_freq          = pnv_get_proc_freq,
 	.progress		= pnv_progress,
 	.machine_shutdown	= pnv_shutdown,
 	.power_save             = power7_idle,
 	.calibrate_decr		= generic_calibrate_decr,
+	.dma_set_mask		= pnv_dma_set_mask,
 #ifdef CONFIG_KEXEC
 	.kexec_cpu_down		= pnv_kexec_cpu_down,
 #endif
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+	.memory_block_size	= pnv_memory_block_size,
+#endif
 };
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 88c9459c3e0..5fcfcf44e3a 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -30,6 +30,9 @@
 #include <asm/cputhreads.h>
 #include <asm/xics.h>
 #include <asm/opal.h>
+#include <asm/runlatch.h>
+#include <asm/code-patching.h>
+#include <asm/dbell.h>
 
 #include "powernv.h"
 
@@ -40,33 +43,22 @@
 #define DBG(fmt...)
 #endif
 
-static void __cpuinit pnv_smp_setup_cpu(int cpu)
+static void pnv_smp_setup_cpu(int cpu)
 {
 	if (cpu != boot_cpuid)
 		xics_setup_cpu();
-}
-
-static int pnv_smp_cpu_bootable(unsigned int nr)
-{
-	/* Special case - we inhibit secondary thread startup
-	 * during boot if the user requests it.
-	 */
-	if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
-		if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
-			return 0;
-		if (smt_enabled_at_boot
-		    && cpu_thread_in_core(nr) >= smt_enabled_at_boot)
-			return 0;
-	}
 
-	return 1;
+#ifdef CONFIG_PPC_DOORBELL
+	if (cpu_has_feature(CPU_FTR_DBELL))
+		doorbell_setup_this_cpu();
+#endif
 }
 
 int pnv_smp_kick_cpu(int nr)
 {
 	unsigned int pcpu = get_hard_smp_processor_id(nr);
-	unsigned long start_here = __pa(*((unsigned long *)
-					  generic_secondary_smp_init));
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
 	long rc;
 
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
@@ -172,16 +164,20 @@ static void pnv_smp_cpu_kill_self(void)
 	 */
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
 	while (!generic_check_cpu_restart(cpu)) {
-		power7_nap();
-		if (!generic_check_cpu_restart(cpu)) {
+		ppc64_runlatch_off();
+		power7_nap(1);
+		ppc64_runlatch_on();
+
+		/* Reenable IRQs briefly to clear the IPI that woke us */
+		local_irq_enable();
+		local_irq_disable();
+		mb();
+
+		if (cpu_core_split_required())
+			continue;
+
+		if (!generic_check_cpu_restart(cpu))
 			DBG("CPU%d Unexpected exit while offline !\n", cpu);
-			/* We may be getting an IPI, so we re-enable
-			 * interrupts to process it, it will be ignored
-			 * since we aren't online (hopefully)
-			 */
-			local_irq_enable();
-			local_irq_disable();
-		}
 	}
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1);
 	DBG("CPU%d coming online...\n", cpu);
@@ -195,7 +191,7 @@ static struct smp_ops_t pnv_smp_ops = {
 	.probe		= xics_smp_probe,
 	.kick_cpu	= pnv_smp_kick_cpu,
 	.setup_cpu	= pnv_smp_setup_cpu,
-	.cpu_bootable	= pnv_smp_cpu_bootable,
+	.cpu_bootable	= smp_generic_cpu_bootable,
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_disable	= pnv_smp_cpu_disable,
 	.cpu_die	= generic_cpu_die,
diff --git a/arch/powerpc/platforms/powernv/subcore-asm.S b/arch/powerpc/platforms/powernv/subcore-asm.S
new file mode 100644
index 00000000000..39bb24aa8f3
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore-asm.S
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+
+#include "subcore.h"
+
+
+_GLOBAL(split_core_secondary_loop)
+	/*
+	 * r3 = u8 *state, used throughout the routine
+	 * r4 = temp
+	 * r5 = temp
+	 * ..
+	 * r12 = MSR
+	 */
+	mfmsr	r12
+
+	/* Disable interrupts so SRR0/1 don't get trashed */
+	li	r4,0
+	ori	r4,r4,MSR_EE|MSR_SE|MSR_BE|MSR_RI
+	andc	r4,r12,r4
+	sync
+	mtmsrd	r4
+
+	/* Switch to real mode and leave interrupts off */
+	li	r5, MSR_IR|MSR_DR
+	andc	r5, r4, r5
+
+	LOAD_REG_ADDR(r4, real_mode)
+
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r5
+	rfid
+	b	.	/* prevent speculative execution */
+
+real_mode:
+	/* Grab values from unsplit SPRs */
+	mfspr	r6,  SPRN_LDBAR
+	mfspr	r7,  SPRN_PMMAR
+	mfspr	r8,  SPRN_PMCR
+	mfspr	r9,  SPRN_RPR
+	mfspr	r10, SPRN_SDR1
+
+	/* Order reading the SPRs vs telling the primary we are ready to split */
+	sync
+
+	/* Tell thread 0 we are in real mode */
+	li	r4, SYNC_STEP_REAL_MODE
+	stb	r4, 0(r3)
+
+	li	r5, (HID0_POWER8_4LPARMODE | HID0_POWER8_2LPARMODE)@highest
+	sldi	r5, r5, 48
+
+	/* Loop until we see the split happen in HID0 */
+1:	mfspr	r4, SPRN_HID0
+	and.	r4, r4, r5
+	beq	1b
+
+	/*
+	 * We only need to initialise the below regs once for each subcore,
+	 * but it's simpler and harmless to do it on each thread.
+	 */
+
+	/* Make sure various SPRS have sane values */
+	li	r4, 0
+	mtspr	SPRN_LPID, r4
+	mtspr	SPRN_PCR, r4
+	mtspr	SPRN_HDEC, r4
+
+	/* Restore SPR values now we are split */
+	mtspr	SPRN_LDBAR, r6
+	mtspr	SPRN_PMMAR, r7
+	mtspr	SPRN_PMCR, r8
+	mtspr	SPRN_RPR, r9
+	mtspr	SPRN_SDR1, r10
+
+	LOAD_REG_ADDR(r5, virtual_mode)
+
+	/* Get out of real mode */
+	mtspr	SPRN_SRR0,r5
+	mtspr	SPRN_SRR1,r12
+	rfid
+	b	.	/* prevent speculative execution */
+
+virtual_mode:
+	blr
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
new file mode 100644
index 00000000000..894ecb3eb59
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)	"powernv: " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/gfp.h>
+#include <linux/smp.h>
+#include <linux/stop_machine.h>
+
+#include <asm/cputhreads.h>
+#include <asm/kvm_ppc.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
+
+#include "subcore.h"
+
+
+/*
+ * Split/unsplit procedure:
+ *
+ * A core can be in one of three states, unsplit, 2-way split, and 4-way split.
+ *
+ * The mapping to subcores_per_core is simple:
+ *
+ *  State       | subcores_per_core
+ *  ------------|------------------
+ *  Unsplit     |        1
+ *  2-way split |        2
+ *  4-way split |        4
+ *
+ * The core is split along thread boundaries, the mapping between subcores and
+ * threads is as follows:
+ *
+ *  Unsplit:
+ *          ----------------------------
+ *  Subcore |            0             |
+ *          ----------------------------
+ *  Thread  |  0  1  2  3  4  5  6  7  |
+ *          ----------------------------
+ *
+ *  2-way split:
+ *          -------------------------------------
+ *  Subcore |        0        |        1        |
+ *          -------------------------------------
+ *  Thread  |  0   1   2   3  |  4   5   6   7  |
+ *          -------------------------------------
+ *
+ *  4-way split:
+ *          -----------------------------------------
+ *  Subcore |    0    |    1    |    2    |    3    |
+ *          -----------------------------------------
+ *  Thread  |  0   1  |  2   3  |  4   5  |  6   7  |
+ *          -----------------------------------------
+ *
+ *
+ * Transitions
+ * -----------
+ *
+ * It is not possible to transition between either of the split states, the
+ * core must first be unsplit. The legal transitions are:
+ *
+ *  -----------          ---------------
+ *  |         |  <---->  | 2-way split |
+ *  |         |          ---------------
+ *  | Unsplit |
+ *  |         |          ---------------
+ *  |         |  <---->  | 4-way split |
+ *  -----------          ---------------
+ *
+ * Unsplitting
+ * -----------
+ *
+ * Unsplitting is the simpler procedure. It requires thread 0 to request the
+ * unsplit while all other threads NAP.
+ *
+ * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells
+ * the hardware that if all threads except 0 are napping, the hardware should
+ * unsplit the core.
+ *
+ * Non-zero threads are sent to a NAP loop, they don't exit the loop until they
+ * see the core unsplit.
+ *
+ * Core 0 spins waiting for the hardware to see all the other threads napping
+ * and perform the unsplit.
+ *
+ * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them
+ * out of NAP. They will then see the core unsplit and exit the NAP loop.
+ *
+ * Splitting
+ * ---------
+ *
+ * The basic splitting procedure is fairly straight forward. However it is
+ * complicated by the fact that after the split occurs, the newly created
+ * subcores are not in a fully initialised state.
+ *
+ * Most notably the subcores do not have the correct value for SDR1, which
+ * means they must not be running in virtual mode when the split occurs. The
+ * subcores have separate timebases SPRs but these are pre-synchronised by
+ * opal.
+ *
+ * To begin with secondary threads are sent to an assembly routine. There they
+ * switch to real mode, so they are immune to the uninitialised SDR1 value.
+ * Once in real mode they indicate that they are in real mode, and spin waiting
+ * to see the core split.
+ *
+ * Thread 0 waits to see that all secondaries are in real mode, and then begins
+ * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which
+ * prevents the hardware from unsplitting. Then it sets the appropriate HID bit
+ * to request the split, and spins waiting to see that the split has happened.
+ *
+ * Concurrently the secondaries will notice the split. When they do they set up
+ * their SPRs, notably SDR1, and then they can return to virtual mode and exit
+ * the procedure.
+ */
+
+/* Initialised at boot by subcore_init() */
+static int subcores_per_core;
+
+/*
+ * Used to communicate to offline cpus that we want them to pop out of the
+ * offline loop and do a split or unsplit.
+ *
+ * 0 - no split happening
+ * 1 - unsplit in progress
+ * 2 - split to 2 in progress
+ * 4 - split to 4 in progress
+ */
+static int new_split_mode;
+
+static cpumask_var_t cpu_offline_mask;
+
+struct split_state {
+	u8 step;
+	u8 master;
+};
+
+static DEFINE_PER_CPU(struct split_state, split_state);
+
+static void wait_for_sync_step(int step)
+{
+	int i, cpu = smp_processor_id();
+
+	for (i = cpu + 1; i < cpu + threads_per_core; i++)
+		while(per_cpu(split_state, i).step < step)
+			barrier();
+
+	/* Order the wait loop vs any subsequent loads/stores. */
+	mb();
+}
+
+static void unsplit_core(void)
+{
+	u64 hid0, mask;
+	int i, cpu;
+
+	mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
+
+	cpu = smp_processor_id();
+	if (cpu_thread_in_core(cpu) != 0) {
+		while (mfspr(SPRN_HID0) & mask)
+			power7_nap(0);
+
+		per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
+		return;
+	}
+
+	hid0 = mfspr(SPRN_HID0);
+	hid0 &= ~HID0_POWER8_DYNLPARDIS;
+	mtspr(SPRN_HID0, hid0);
+
+	while (mfspr(SPRN_HID0) & mask)
+		cpu_relax();
+
+	/* Wake secondaries out of NAP */
+	for (i = cpu + 1; i < cpu + threads_per_core; i++)
+		smp_send_reschedule(i);
+
+	wait_for_sync_step(SYNC_STEP_UNSPLIT);
+}
+
+static void split_core(int new_mode)
+{
+	struct {  u64 value; u64 mask; } split_parms[2] = {
+		{ HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE },
+		{ HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE }
+	};
+	int i, cpu;
+	u64 hid0;
+
+	/* Convert new_mode (2 or 4) into an index into our parms array */
+	i = (new_mode >> 1) - 1;
+	BUG_ON(i < 0 || i > 1);
+
+	cpu = smp_processor_id();
+	if (cpu_thread_in_core(cpu) != 0) {
+		split_core_secondary_loop(&per_cpu(split_state, cpu).step);
+		return;
+	}
+
+	wait_for_sync_step(SYNC_STEP_REAL_MODE);
+
+	/* Write new mode */
+	hid0  = mfspr(SPRN_HID0);
+	hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
+	mtspr(SPRN_HID0, hid0);
+
+	/* Wait for it to happen */
+	while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
+		cpu_relax();
+}
+
+static void cpu_do_split(int new_mode)
+{
+	/*
+	 * At boot subcores_per_core will be 0, so we will always unsplit at
+	 * boot. In the usual case where the core is already unsplit it's a
+	 * nop, and this just ensures the kernel's notion of the mode is
+	 * consistent with the hardware.
+	 */
+	if (subcores_per_core != 1)
+		unsplit_core();
+
+	if (new_mode != 1)
+		split_core(new_mode);
+
+	mb();
+	per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED;
+}
+
+bool cpu_core_split_required(void)
+{
+	smp_rmb();
+
+	if (!new_split_mode)
+		return false;
+
+	cpu_do_split(new_split_mode);
+
+	return true;
+}
+
+static int cpu_update_split_mode(void *data)
+{
+	int cpu, new_mode = *(int *)data;
+
+	if (this_cpu_ptr(&split_state)->master) {
+		new_split_mode = new_mode;
+		smp_wmb();
+
+		cpumask_andnot(cpu_offline_mask, cpu_present_mask,
+			       cpu_online_mask);
+
+		/* This should work even though the cpu is offline */
+		for_each_cpu(cpu, cpu_offline_mask)
+			smp_send_reschedule(cpu);
+	}
+
+	cpu_do_split(new_mode);
+
+	if (this_cpu_ptr(&split_state)->master) {
+		/* Wait for all cpus to finish before we touch subcores_per_core */
+		for_each_present_cpu(cpu) {
+			if (cpu >= setup_max_cpus)
+				break;
+
+			while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED)
+				barrier();
+		}
+
+		new_split_mode = 0;
+
+		/* Make the new mode public */
+		subcores_per_core = new_mode;
+		threads_per_subcore = threads_per_core / subcores_per_core;
+
+		/* Make sure the new mode is written before we exit */
+		mb();
+	}
+
+	return 0;
+}
+
+static int set_subcores_per_core(int new_mode)
+{
+	struct split_state *state;
+	int cpu;
+
+	if (kvm_hv_mode_active()) {
+		pr_err("Unable to change split core mode while KVM active.\n");
+		return -EBUSY;
+	}
+
+	/*
+	 * We are only called at boot, or from the sysfs write. If that ever
+	 * changes we'll need a lock here.
+	 */
+	BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3);
+
+	for_each_present_cpu(cpu) {
+		state = &per_cpu(split_state, cpu);
+		state->step = SYNC_STEP_INITIAL;
+		state->master = 0;
+	}
+
+	get_online_cpus();
+
+	/* This cpu will update the globals before exiting stop machine */
+	this_cpu_ptr(&split_state)->master = 1;
+
+	/* Ensure state is consistent before we call the other cpus */
+	mb();
+
+	stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask);
+
+	put_online_cpus();
+
+	return 0;
+}
+
+static ssize_t __used store_subcores_per_core(struct device *dev,
+		struct device_attribute *attr, const char *buf,
+		size_t count)
+{
+	unsigned long val;
+	int rc;
+
+	/* We are serialised by the attribute lock */
+
+	rc = sscanf(buf, "%lx", &val);
+	if (rc != 1)
+		return -EINVAL;
+
+	switch (val) {
+	case 1:
+	case 2:
+	case 4:
+		if (subcores_per_core == val)
+			/* Nothing to do */
+			goto out;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rc = set_subcores_per_core(val);
+	if (rc)
+		return rc;
+
+out:
+	return count;
+}
+
+static ssize_t show_subcores_per_core(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%x\n", subcores_per_core);
+}
+
+static DEVICE_ATTR(subcores_per_core, 0644,
+		show_subcores_per_core, store_subcores_per_core);
+
+static int subcore_init(void)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return 0;
+
+	/*
+	 * We need all threads in a core to be present to split/unsplit so
+         * continue only if max_cpus are aligned to threads_per_core.
+	 */
+	if (setup_max_cpus % threads_per_core)
+		return 0;
+
+	BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL));
+
+	set_subcores_per_core(1);
+
+	return device_create_file(cpu_subsys.dev_root,
+				  &dev_attr_subcores_per_core);
+}
+machine_device_initcall(powernv, subcore_init);
diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
new file mode 100644
index 00000000000..148abc91deb
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* These are ordered and tested with <= */
+#define SYNC_STEP_INITIAL	0
+#define SYNC_STEP_UNSPLIT	1	/* Set by secondary when it sees unsplit */
+#define SYNC_STEP_REAL_MODE	2	/* Set by secondary when in real mode  */
+#define SYNC_STEP_FINISHED	3	/* Set by secondary when split/unsplit is done */
+
+#ifndef __ASSEMBLY__
+void split_core_secondary_loop(u8 *state);
+#endif
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index e87c1947397..56f274064d6 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -2,10 +2,8 @@ config PPC_PS3
 	bool "Sony PS3"
 	depends on PPC64 && PPC_BOOK3S
 	select PPC_CELL
-	select USB_ARCH_HAS_OHCI
 	select USB_OHCI_LITTLE_ENDIAN
 	select USB_OHCI_BIG_ENDIAN_MMIO
-	select USB_ARCH_HAS_EHCI
 	select USB_EHCI_BIG_ENDIAN_MMIO
 	select PPC_PCI_CHOICE
 	help
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 177a2f70700..3e270e3412a 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -109,7 +109,8 @@ static long ps3_hpte_remove(unsigned long hpte_group)
 }
 
 static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
-	unsigned long vpn, int psize, int ssize, int local)
+			      unsigned long vpn, int psize, int apsize,
+			      int ssize, int local)
 {
 	int result;
 	u64 hpte_v, want_v, hpte_rs;
@@ -162,7 +163,7 @@ static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 }
 
 static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
-	int psize, int ssize, int local)
+				int psize, int apsize, int ssize, int local)
 {
 	unsigned long flags;
 	int result;
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index 4b35166229f..b358bec6c8c 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -76,7 +76,7 @@ static int __init ps3_smp_probe(void)
 
 		BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION    != 0);
 		BUILD_BUG_ON(PPC_MSG_RESCHEDULE       != 1);
-		BUILD_BUG_ON(PPC_MSG_CALL_FUNC_SINGLE != 2);
+		BUILD_BUG_ON(PPC_MSG_TICK_BROADCAST   != 2);
 		BUILD_BUG_ON(PPC_MSG_DEBUGGER_BREAK   != 3);
 
 		for (i = 0; i < MSG_COUNT; i++) {
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index e17fa1432d8..a0bca05e26b 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -143,7 +143,7 @@ static void _dump_areas(unsigned int spe_id, unsigned long priv2,
 	pr_debug("%s:%d: shadow:  %lxh\n", func, line, shadow);
 }
 
-inline u64 ps3_get_spe_id(void *arg)
+u64 ps3_get_spe_id(void *arg)
 {
 	return spu_pdata(arg)->spe_id;
 }
diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c
index cba1e6be68e..ce73ce86561 100644
--- a/arch/powerpc/platforms/ps3/time.c
+++ b/arch/powerpc/platforms/ps3/time.c
@@ -90,7 +90,7 @@ static int __init ps3_rtc_init(void)
 
 	pdev = platform_device_register_simple("rtc-ps3", -1, NULL, 0);
 
-	return PTR_RET(pdev);
+	return PTR_ERR_OR_ZERO(pdev);
 }
 
 module_init(ps3_rtc_init);
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 4459eff7a75..756b482f819 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -19,8 +19,9 @@ config PPC_PSERIES
 	select ZLIB_DEFLATE
 	select PPC_DOORBELL
 	select HAVE_CONTEXT_TRACKING
-	select HOTPLUG if SMP
 	select HOTPLUG_CPU if SMP
+	select ARCH_RANDOM
+	select PPC_DOORBELL
 	default y
 
 config PPC_SPLPAR
@@ -33,14 +34,9 @@ config PPC_SPLPAR
 	  processors, that is, which share physical processors between
 	  two or more partitions.
 
-config EEH
-	bool
-	depends on PPC_PSERIES && PCI
-	default y
-
 config PSERIES_MSI
        bool
-       depends on PCI_MSI && EEH
+       depends on PCI_MSI && PPC_PSERIES && EEH
        default y
 
 config PSERIES_ENERGY
@@ -116,6 +112,18 @@ config CMM
 	  will be reused for other LPARs. The interface allows firmware to
 	  balance memory across many LPARs.
 
+config HV_PERF_CTRS
+       bool "Hypervisor supplied PMU events (24x7 & GPCI)"
+       default y
+       depends on PERF_EVENTS && PPC_PSERIES
+       help
+	  Enable access to hypervisor supplied counters in perf. Currently,
+	  this enables code that uses the hcall GetPerfCounterInfo and 24x7
+	  interfaces to retrieve counters. GPCI exists on Power 6 and later
+	  systems. 24x7 is available on Power 8 systems.
+
+          If unsure, select Y.
+
 config DTL
 	bool "Dispatch Trace Log"
 	depends on PPC_SPLPAR && DEBUG_FS
@@ -125,12 +133,3 @@ config DTL
 	  which are accessible through a debugfs file.
 
 	  Say N if you are unsure.
-
-config PSERIES_IDLE
-	bool "Cpuidle driver for pSeries platforms"
-	depends on CPU_IDLE
-	depends on PPC_PSERIES
-	default y
-	help
-	  Select this option to enable processor idle state management
-	  through cpuidle subsystem.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 53866e537a9..03480796af9 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -3,12 +3,10 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG)	+= -DDEBUG
 
 obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   setup.o iommu.o event_sources.o ras.o \
-			   firmware.o power.o dlpar.o mobility.o
+			   firmware.o power.o dlpar.o mobility.o rng.o
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
-obj-$(CONFIG_EEH)	+= eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
-			   eeh_driver.o eeh_event.o eeh_sysfs.o \
-			   eeh_pseries.o
+obj-$(CONFIG_EEH)	+= eeh_pseries.o
 obj-$(CONFIG_KEXEC)	+= kexec.o
 obj-$(CONFIG_PCI)	+= pci.o pci_dlpar.o
 obj-$(CONFIG_PSERIES_MSI)	+= msi.o
@@ -23,7 +21,7 @@ obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_DTL)		+= dtl.o
 obj-$(CONFIG_IO_EVENT_IRQ)	+= io_event_irq.o
-obj-$(CONFIG_PSERIES_IDLE)	+= processor_idle.o
+obj-$(CONFIG_LPARCFG)		+= lparcfg.o
 
 ifeq ($(CONFIG_PPC_PSERIES),y)
 obj-$(CONFIG_SUSPEND)		+= suspend.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index c638535753d..2d8bf15879f 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -25,7 +25,6 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/gfp.h>
-#include <linux/init.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/oom.h>
@@ -40,8 +39,7 @@
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
 #include <linux/memory.h>
-
-#include "plpar_wrappers.h"
+#include <asm/plpar_wrappers.h>
 
 #define CMM_DRIVER_VERSION	"1.0.0"
 #define CMM_DEFAULT_DELAY	1
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index a1a7b9a67ff..2d0b4d68a40 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -11,7 +11,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/kref.h>
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
 #include <linux/cpu.h>
@@ -63,26 +62,32 @@ static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa)
 	return prop;
 }
 
-static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa)
+static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa,
+					       const char *path)
 {
 	struct device_node *dn;
 	char *name;
 
+	/* If parent node path is "/" advance path to NULL terminator to
+	 * prevent double leading slashs in full_name.
+	 */
+	if (!path[1])
+		path++;
+
 	dn = kzalloc(sizeof(*dn), GFP_KERNEL);
 	if (!dn)
 		return NULL;
 
-	/* The configure connector reported name does not contain a
-	 * preceding '/', so we allocate a buffer large enough to
-	 * prepend this to the full_name.
-	 */
 	name = (char *)ccwa + ccwa->name_offset;
-	dn->full_name = kasprintf(GFP_KERNEL, "/%s", name);
+	dn->full_name = kasprintf(GFP_KERNEL, "%s/%s", path, name);
 	if (!dn->full_name) {
 		kfree(dn);
 		return NULL;
 	}
 
+	of_node_set_flag(dn, OF_DYNAMIC);
+	of_node_init(dn);
+
 	return dn;
 }
 
@@ -120,7 +125,8 @@ void dlpar_free_cc_nodes(struct device_node *dn)
 #define CALL_AGAIN	-2
 #define ERR_CFG_USE     -9003
 
-struct device_node *dlpar_configure_connector(u32 drc_index)
+struct device_node *dlpar_configure_connector(u32 drc_index,
+					      struct device_node *parent)
 {
 	struct device_node *dn;
 	struct device_node *first_dn = NULL;
@@ -129,6 +135,7 @@ struct device_node *dlpar_configure_connector(u32 drc_index)
 	struct property *last_property = NULL;
 	struct cc_workarea *ccwa;
 	char *data_buf;
+	const char *parent_path = parent->full_name;
 	int cc_token;
 	int rc = -1;
 
@@ -162,7 +169,7 @@ struct device_node *dlpar_configure_connector(u32 drc_index)
 			break;
 
 		case NEXT_SIBLING:
-			dn = dlpar_parse_cc_node(ccwa);
+			dn = dlpar_parse_cc_node(ccwa, parent_path);
 			if (!dn)
 				goto cc_error;
 
@@ -172,13 +179,17 @@ struct device_node *dlpar_configure_connector(u32 drc_index)
 			break;
 
 		case NEXT_CHILD:
-			dn = dlpar_parse_cc_node(ccwa);
+			if (first_dn)
+				parent_path = last_dn->full_name;
+
+			dn = dlpar_parse_cc_node(ccwa, parent_path);
 			if (!dn)
 				goto cc_error;
 
-			if (!first_dn)
+			if (!first_dn) {
+				dn->parent = parent;
 				first_dn = dn;
-			else {
+			} else {
 				dn->parent = last_dn;
 				if (last_dn)
 					last_dn->child = dn;
@@ -202,6 +213,7 @@ struct device_node *dlpar_configure_connector(u32 drc_index)
 
 		case PREV_PARENT:
 			last_dn = last_dn->parent;
+			parent_path = last_dn->parent->full_name;
 			break;
 
 		case CALL_AGAIN:
@@ -256,8 +268,6 @@ int dlpar_attach_node(struct device_node *dn)
 {
 	int rc;
 
-	of_node_set_flag(dn, OF_DYNAMIC);
-	kref_init(&dn->kref);
 	dn->parent = derive_parent(dn->full_name);
 	if (!dn->parent)
 		return -ENOMEM;
@@ -275,8 +285,15 @@ int dlpar_attach_node(struct device_node *dn)
 
 int dlpar_detach_node(struct device_node *dn)
 {
+	struct device_node *child;
 	int rc;
 
+	child = of_get_next_child(dn, NULL);
+	while (child) {
+		dlpar_detach_node(child);
+		child = of_get_next_child(dn, child);
+	}
+
 	rc = of_detach_node(dn);
 	if (rc)
 		return rc;
@@ -382,57 +399,42 @@ out:
 
 static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 {
-	struct device_node *dn;
+	struct device_node *dn, *parent;
 	unsigned long drc_index;
-	char *cpu_name;
 	int rc;
 
-	cpu_hotplug_driver_lock();
 	rc = strict_strtoul(buf, 0, &drc_index);
-	if (rc) {
-		rc = -EINVAL;
-		goto out;
-	}
+	if (rc)
+		return -EINVAL;
 
-	dn = dlpar_configure_connector(drc_index);
-	if (!dn) {
-		rc = -EINVAL;
-		goto out;
-	}
+	parent = of_find_node_by_path("/cpus");
+	if (!parent)
+		return -ENODEV;
 
-	/* configure-connector reports cpus as living in the base
-	 * directory of the device tree.  CPUs actually live in the
-	 * cpus directory so we need to fixup the full_name.
-	 */
-	cpu_name = kasprintf(GFP_KERNEL, "/cpus%s", dn->full_name);
-	if (!cpu_name) {
-		dlpar_free_cc_nodes(dn);
-		rc = -ENOMEM;
-		goto out;
-	}
+	dn = dlpar_configure_connector(drc_index, parent);
+	if (!dn)
+		return -EINVAL;
 
-	kfree(dn->full_name);
-	dn->full_name = cpu_name;
+	of_node_put(parent);
 
 	rc = dlpar_acquire_drc(drc_index);
 	if (rc) {
 		dlpar_free_cc_nodes(dn);
-		rc = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	rc = dlpar_attach_node(dn);
 	if (rc) {
 		dlpar_release_drc(drc_index);
 		dlpar_free_cc_nodes(dn);
-		goto out;
+		return rc;
 	}
 
 	rc = dlpar_online_cpu(dn);
-out:
-	cpu_hotplug_driver_unlock();
+	if (rc)
+		return rc;
 
-	return rc ? rc : count;
+	return count;
 }
 
 static int dlpar_offline_cpu(struct device_node *dn)
@@ -505,30 +507,27 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
 		return -EINVAL;
 	}
 
-	cpu_hotplug_driver_lock();
 	rc = dlpar_offline_cpu(dn);
 	if (rc) {
 		of_node_put(dn);
-		rc = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	rc = dlpar_release_drc(*drc_index);
 	if (rc) {
 		of_node_put(dn);
-		goto out;
+		return rc;
 	}
 
 	rc = dlpar_detach_node(dn);
 	if (rc) {
 		dlpar_acquire_drc(*drc_index);
-		goto out;
+		return rc;
 	}
 
 	of_node_put(dn);
-out:
-	cpu_hotplug_driver_unlock();
-	return rc ? rc : count;
+
+	return count;
 }
 
 static int __init pseries_dlpar_init(void)
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 0cc0ac07a55..7d61498e45c 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -20,7 +20,6 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
 #include <linux/spinlock.h>
@@ -29,8 +28,7 @@
 #include <asm/firmware.h>
 #include <asm/lppaca.h>
 #include <asm/debug.h>
-
-#include "plpar_wrappers.h"
+#include <asm/plpar_wrappers.h>
 
 struct dtl {
 	struct dtl_entry	*buf;
@@ -87,7 +85,7 @@ static void consume_dtle(struct dtl_entry *dtle, u64 index)
 	barrier();
 
 	/* check for hypervisor ring buffer overflow, ignore this entry if so */
-	if (index + N_DISPATCH_LOG < vpa->dtl_idx)
+	if (index + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx))
 		return;
 
 	++wp;
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
deleted file mode 100644
index 6b73d6c44f5..00000000000
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ /dev/null
@@ -1,942 +0,0 @@
-/*
- * Copyright IBM Corporation 2001, 2005, 2006
- * Copyright Dave Engebretsen & Todd Inglett 2001
- * Copyright Linas Vepstas 2005, 2006
- * Copyright 2001-2012 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
- */
-
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/pci.h>
-#include <linux/proc_fs.h>
-#include <linux/rbtree.h>
-#include <linux/seq_file.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/of.h>
-
-#include <linux/atomic.h>
-#include <asm/eeh.h>
-#include <asm/eeh_event.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ppc-pci.h>
-#include <asm/rtas.h>
-
-
-/** Overview:
- *  EEH, or "Extended Error Handling" is a PCI bridge technology for
- *  dealing with PCI bus errors that can't be dealt with within the
- *  usual PCI framework, except by check-stopping the CPU.  Systems
- *  that are designed for high-availability/reliability cannot afford
- *  to crash due to a "mere" PCI error, thus the need for EEH.
- *  An EEH-capable bridge operates by converting a detected error
- *  into a "slot freeze", taking the PCI adapter off-line, making
- *  the slot behave, from the OS'es point of view, as if the slot
- *  were "empty": all reads return 0xff's and all writes are silently
- *  ignored.  EEH slot isolation events can be triggered by parity
- *  errors on the address or data busses (e.g. during posted writes),
- *  which in turn might be caused by low voltage on the bus, dust,
- *  vibration, humidity, radioactivity or plain-old failed hardware.
- *
- *  Note, however, that one of the leading causes of EEH slot
- *  freeze events are buggy device drivers, buggy device microcode,
- *  or buggy device hardware.  This is because any attempt by the
- *  device to bus-master data to a memory address that is not
- *  assigned to the device will trigger a slot freeze.   (The idea
- *  is to prevent devices-gone-wild from corrupting system memory).
- *  Buggy hardware/drivers will have a miserable time co-existing
- *  with EEH.
- *
- *  Ideally, a PCI device driver, when suspecting that an isolation
- *  event has occurred (e.g. by reading 0xff's), will then ask EEH
- *  whether this is the case, and then take appropriate steps to
- *  reset the PCI slot, the PCI device, and then resume operations.
- *  However, until that day,  the checking is done here, with the
- *  eeh_check_failure() routine embedded in the MMIO macros.  If
- *  the slot is found to be isolated, an "EEH Event" is synthesized
- *  and sent out for processing.
- */
-
-/* If a device driver keeps reading an MMIO register in an interrupt
- * handler after a slot isolation event, it might be broken.
- * This sets the threshold for how many read attempts we allow
- * before printing an error message.
- */
-#define EEH_MAX_FAILS	2100000
-
-/* Time to wait for a PCI slot to report status, in milliseconds */
-#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
-
-/* Platform dependent EEH operations */
-struct eeh_ops *eeh_ops = NULL;
-
-int eeh_subsystem_enabled;
-EXPORT_SYMBOL(eeh_subsystem_enabled);
-
-/*
- * EEH probe mode support. The intention is to support multiple
- * platforms for EEH. Some platforms like pSeries do PCI emunation
- * based on device tree. However, other platforms like powernv probe
- * PCI devices from hardware. The flag is used to distinguish that.
- * In addition, struct eeh_ops::probe would be invoked for particular
- * OF node or PCI device so that the corresponding PE would be created
- * there.
- */
-int eeh_probe_mode;
-
-/* Global EEH mutex */
-DEFINE_MUTEX(eeh_mutex);
-
-/* Lock to avoid races due to multiple reports of an error */
-static DEFINE_RAW_SPINLOCK(confirm_error_lock);
-
-/* Buffer for reporting pci register dumps. Its here in BSS, and
- * not dynamically alloced, so that it ends up in RMO where RTAS
- * can access it.
- */
-#define EEH_PCI_REGS_LOG_LEN 4096
-static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
-
-/*
- * The struct is used to maintain the EEH global statistic
- * information. Besides, the EEH global statistics will be
- * exported to user space through procfs
- */
-struct eeh_stats {
-	u64 no_device;		/* PCI device not found		*/
-	u64 no_dn;		/* OF node not found		*/
-	u64 no_cfg_addr;	/* Config address not found	*/
-	u64 ignored_check;	/* EEH check skipped		*/
-	u64 total_mmio_ffs;	/* Total EEH checks		*/
-	u64 false_positives;	/* Unnecessary EEH checks	*/
-	u64 slot_resets;	/* PE reset			*/
-};
-
-static struct eeh_stats eeh_stats;
-
-#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
-
-/**
- * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
- * @edev: device to report data for
- * @buf: point to buffer in which to log
- * @len: amount of room in buffer
- *
- * This routine captures assorted PCI configuration space data,
- * and puts them into a buffer for RTAS error logging.
- */
-static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
-{
-	struct device_node *dn = eeh_dev_to_of_node(edev);
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	u32 cfg;
-	int cap, i;
-	int n = 0;
-
-	n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
-	printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
-
-	eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
-	n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
-	printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
-
-	eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
-	n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
-	printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
-
-	if (!dev) {
-		printk(KERN_WARNING "EEH: no PCI device for this of node\n");
-		return n;
-	}
-
-	/* Gather bridge-specific registers */
-	if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
-		eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
-		n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
-		printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
-
-		eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
-		n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
-		printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
-	}
-
-	/* Dump out the PCI-X command and status regs */
-	cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
-	if (cap) {
-		eeh_ops->read_config(dn, cap, 4, &cfg);
-		n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
-		printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
-
-		eeh_ops->read_config(dn, cap+4, 4, &cfg);
-		n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
-		printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
-	}
-
-	/* If PCI-E capable, dump PCI-E cap 10, and the AER */
-	cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
-	if (cap) {
-		n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
-		printk(KERN_WARNING
-		       "EEH: PCI-E capabilities and status follow:\n");
-
-		for (i=0; i<=8; i++) {
-			eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
-			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
-			printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
-		}
-
-		cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
-		if (cap) {
-			n += scnprintf(buf+n, len-n, "pci-e AER:\n");
-			printk(KERN_WARNING
-			       "EEH: PCI-E AER capability register set follows:\n");
-
-			for (i=0; i<14; i++) {
-				eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
-				n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
-				printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
-			}
-		}
-	}
-
-	return n;
-}
-
-/**
- * eeh_slot_error_detail - Generate combined log including driver log and error log
- * @pe: EEH PE
- * @severity: temporary or permanent error log
- *
- * This routine should be called to generate the combined log, which
- * is comprised of driver log and error log. The driver log is figured
- * out from the config space of the corresponding PCI device, while
- * the error log is fetched through platform dependent function call.
- */
-void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
-{
-	size_t loglen = 0;
-	struct eeh_dev *edev;
-
-	eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
-	eeh_ops->configure_bridge(pe);
-	eeh_pe_restore_bars(pe);
-
-	pci_regs_buf[0] = 0;
-	eeh_pe_for_each_dev(pe, edev) {
-		loglen += eeh_gather_pci_data(edev, pci_regs_buf,
-				EEH_PCI_REGS_LOG_LEN);
-        }
-
-	eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
-}
-
-/**
- * eeh_token_to_phys - Convert EEH address token to phys address
- * @token: I/O token, should be address in the form 0xA....
- *
- * This routine should be called to convert virtual I/O address
- * to physical one.
- */
-static inline unsigned long eeh_token_to_phys(unsigned long token)
-{
-	pte_t *ptep;
-	unsigned long pa;
-
-	ptep = find_linux_pte(init_mm.pgd, token);
-	if (!ptep)
-		return token;
-	pa = pte_pfn(*ptep) << PAGE_SHIFT;
-
-	return pa | (token & (PAGE_SIZE-1));
-}
-
-/**
- * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
- * @edev: eeh device
- *
- * Check for an EEH failure for the given device node.  Call this
- * routine if the result of a read was all 0xff's and you want to
- * find out if this is due to an EEH slot freeze.  This routine
- * will query firmware for the EEH status.
- *
- * Returns 0 if there has not been an EEH error; otherwise returns
- * a non-zero value and queues up a slot isolation event notification.
- *
- * It is safe to call this routine in an interrupt context.
- */
-int eeh_dev_check_failure(struct eeh_dev *edev)
-{
-	int ret;
-	unsigned long flags;
-	struct device_node *dn;
-	struct pci_dev *dev;
-	struct eeh_pe *pe;
-	int rc = 0;
-	const char *location;
-
-	eeh_stats.total_mmio_ffs++;
-
-	if (!eeh_subsystem_enabled)
-		return 0;
-
-	if (!edev) {
-		eeh_stats.no_dn++;
-		return 0;
-	}
-	dn = eeh_dev_to_of_node(edev);
-	dev = eeh_dev_to_pci_dev(edev);
-	pe = edev->pe;
-
-	/* Access to IO BARs might get this far and still not want checking. */
-	if (!pe) {
-		eeh_stats.ignored_check++;
-		pr_debug("EEH: Ignored check for %s %s\n",
-			eeh_pci_name(dev), dn->full_name);
-		return 0;
-	}
-
-	if (!pe->addr && !pe->config_addr) {
-		eeh_stats.no_cfg_addr++;
-		return 0;
-	}
-
-	/* If we already have a pending isolation event for this
-	 * slot, we know it's bad already, we don't need to check.
-	 * Do this checking under a lock; as multiple PCI devices
-	 * in one slot might report errors simultaneously, and we
-	 * only want one error recovery routine running.
-	 */
-	raw_spin_lock_irqsave(&confirm_error_lock, flags);
-	rc = 1;
-	if (pe->state & EEH_PE_ISOLATED) {
-		pe->check_count++;
-		if (pe->check_count % EEH_MAX_FAILS == 0) {
-			location = of_get_property(dn, "ibm,loc-code", NULL);
-			printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
-				"location=%s driver=%s pci addr=%s\n",
-				pe->check_count, location,
-				eeh_driver_name(dev), eeh_pci_name(dev));
-			printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
-				eeh_driver_name(dev));
-			dump_stack();
-		}
-		goto dn_unlock;
-	}
-
-	/*
-	 * Now test for an EEH failure.  This is VERY expensive.
-	 * Note that the eeh_config_addr may be a parent device
-	 * in the case of a device behind a bridge, or it may be
-	 * function zero of a multi-function device.
-	 * In any case they must share a common PHB.
-	 */
-	ret = eeh_ops->get_state(pe, NULL);
-
-	/* Note that config-io to empty slots may fail;
-	 * they are empty when they don't have children.
-	 * We will punt with the following conditions: Failure to get
-	 * PE's state, EEH not support and Permanently unavailable
-	 * state, PE is in good state.
-	 */
-	if ((ret < 0) ||
-	    (ret == EEH_STATE_NOT_SUPPORT) ||
-	    (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
-	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
-		eeh_stats.false_positives++;
-		pe->false_positives++;
-		rc = 0;
-		goto dn_unlock;
-	}
-
-	eeh_stats.slot_resets++;
- 
-	/* Avoid repeated reports of this failure, including problems
-	 * with other functions on this device, and functions under
-	 * bridges.
-	 */
-	eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
-
-	eeh_send_failure_event(pe);
-
-	/* Most EEH events are due to device driver bugs.  Having
-	 * a stack trace will help the device-driver authors figure
-	 * out what happened.  So print that out.
-	 */
-	WARN(1, "EEH: failure detected\n");
-	return 1;
-
-dn_unlock:
-	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
-	return rc;
-}
-
-EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
-
-/**
- * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
- * @token: I/O token, should be address in the form 0xA....
- * @val: value, should be all 1's (XXX why do we need this arg??)
- *
- * Check for an EEH failure at the given token address.  Call this
- * routine if the result of a read was all 0xff's and you want to
- * find out if this is due to an EEH slot freeze event.  This routine
- * will query firmware for the EEH status.
- *
- * Note this routine is safe to call in an interrupt context.
- */
-unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
-{
-	unsigned long addr;
-	struct eeh_dev *edev;
-
-	/* Finding the phys addr + pci device; this is pretty quick. */
-	addr = eeh_token_to_phys((unsigned long __force) token);
-	edev = eeh_addr_cache_get_dev(addr);
-	if (!edev) {
-		eeh_stats.no_device++;
-		return val;
-	}
-
-	eeh_dev_check_failure(edev);
-
-	pci_dev_put(eeh_dev_to_pci_dev(edev));
-	return val;
-}
-
-EXPORT_SYMBOL(eeh_check_failure);
-
-
-/**
- * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
- * @pe: EEH PE
- *
- * This routine should be called to reenable frozen MMIO or DMA
- * so that it would work correctly again. It's useful while doing
- * recovery or log collection on the indicated device.
- */
-int eeh_pci_enable(struct eeh_pe *pe, int function)
-{
-	int rc;
-
-	rc = eeh_ops->set_option(pe, function);
-	if (rc)
-		pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
-			__func__, function, pe->phb->global_number, pe->addr, rc);
-
-	rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
-	if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
-	   (function == EEH_OPT_THAW_MMIO))
-		return 0;
-
-	return rc;
-}
-
-/**
- * pcibios_set_pcie_slot_reset - Set PCI-E reset state
- * @dev: pci device struct
- * @state: reset state to enter
- *
- * Return value:
- * 	0 if success
- */
-int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
-{
-	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-	struct eeh_pe *pe = edev->pe;
-
-	if (!pe) {
-		pr_err("%s: No PE found on PCI device %s\n",
-			__func__, pci_name(dev));
-		return -EINVAL;
-	}
-
-	switch (state) {
-	case pcie_deassert_reset:
-		eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
-		break;
-	case pcie_hot_reset:
-		eeh_ops->reset(pe, EEH_RESET_HOT);
-		break;
-	case pcie_warm_reset:
-		eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
-		break;
-	default:
-		return -EINVAL;
-	};
-
-	return 0;
-}
-
-/**
- * eeh_set_pe_freset - Check the required reset for the indicated device
- * @data: EEH device
- * @flag: return value
- *
- * Each device might have its preferred reset type: fundamental or
- * hot reset. The routine is used to collected the information for
- * the indicated device and its children so that the bunch of the
- * devices could be reset properly.
- */
-static void *eeh_set_dev_freset(void *data, void *flag)
-{
-	struct pci_dev *dev;
-	unsigned int *freset = (unsigned int *)flag;
-	struct eeh_dev *edev = (struct eeh_dev *)data;
-
-	dev = eeh_dev_to_pci_dev(edev);
-	if (dev)
-		*freset |= dev->needs_freset;
-
-	return NULL;
-}
-
-/**
- * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
- * @pe: EEH PE
- *
- * Assert the PCI #RST line for 1/4 second.
- */
-static void eeh_reset_pe_once(struct eeh_pe *pe)
-{
-	unsigned int freset = 0;
-
-	/* Determine type of EEH reset required for
-	 * Partitionable Endpoint, a hot-reset (1)
-	 * or a fundamental reset (3).
-	 * A fundamental reset required by any device under
-	 * Partitionable Endpoint trumps hot-reset.
-  	 */
-	eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
-
-	if (freset)
-		eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
-	else
-		eeh_ops->reset(pe, EEH_RESET_HOT);
-
-	/* The PCI bus requires that the reset be held high for at least
-	 * a 100 milliseconds. We wait a bit longer 'just in case'.
-	 */
-#define PCI_BUS_RST_HOLD_TIME_MSEC 250
-	msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
-	
-	/* We might get hit with another EEH freeze as soon as the 
-	 * pci slot reset line is dropped. Make sure we don't miss
-	 * these, and clear the flag now.
-	 */
-	eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-
-	eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
-
-	/* After a PCI slot has been reset, the PCI Express spec requires
-	 * a 1.5 second idle time for the bus to stabilize, before starting
-	 * up traffic.
-	 */
-#define PCI_BUS_SETTLE_TIME_MSEC 1800
-	msleep(PCI_BUS_SETTLE_TIME_MSEC);
-}
-
-/**
- * eeh_reset_pe - Reset the indicated PE
- * @pe: EEH PE
- *
- * This routine should be called to reset indicated device, including
- * PE. A PE might include multiple PCI devices and sometimes PCI bridges
- * might be involved as well.
- */
-int eeh_reset_pe(struct eeh_pe *pe)
-{
-	int i, rc;
-
-	/* Take three shots at resetting the bus */
-	for (i=0; i<3; i++) {
-		eeh_reset_pe_once(pe);
-
-		rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
-		if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
-			return 0;
-
-		if (rc < 0) {
-			pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
-				__func__, pe->phb->global_number, pe->addr);
-			return -1;
-		}
-		pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
-			i+1, pe->phb->global_number, pe->addr, rc);
-	}
-
-	return -1;
-}
-
-/**
- * eeh_save_bars - Save device bars
- * @edev: PCI device associated EEH device
- *
- * Save the values of the device bars. Unlike the restore
- * routine, this routine is *not* recursive. This is because
- * PCI devices are added individually; but, for the restore,
- * an entire slot is reset at a time.
- */
-void eeh_save_bars(struct eeh_dev *edev)
-{
-	int i;
-	struct device_node *dn;
-
-	if (!edev)
-		return;
-	dn = eeh_dev_to_of_node(edev);
-	
-	for (i = 0; i < 16; i++)
-		eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
-}
-
-/**
- * eeh_ops_register - Register platform dependent EEH operations
- * @ops: platform dependent EEH operations
- *
- * Register the platform dependent EEH operation callback
- * functions. The platform should call this function before
- * any other EEH operations.
- */
-int __init eeh_ops_register(struct eeh_ops *ops)
-{
-	if (!ops->name) {
-		pr_warning("%s: Invalid EEH ops name for %p\n",
-			__func__, ops);
-		return -EINVAL;
-	}
-
-	if (eeh_ops && eeh_ops != ops) {
-		pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
-			__func__, eeh_ops->name, ops->name);
-		return -EEXIST;
-	}
-
-	eeh_ops = ops;
-
-	return 0;
-}
-
-/**
- * eeh_ops_unregister - Unreigster platform dependent EEH operations
- * @name: name of EEH platform operations
- *
- * Unregister the platform dependent EEH operation callback
- * functions.
- */
-int __exit eeh_ops_unregister(const char *name)
-{
-	if (!name || !strlen(name)) {
-		pr_warning("%s: Invalid EEH ops name\n",
-			__func__);
-		return -EINVAL;
-	}
-
-	if (eeh_ops && !strcmp(eeh_ops->name, name)) {
-		eeh_ops = NULL;
-		return 0;
-	}
-
-	return -EEXIST;
-}
-
-/**
- * eeh_init - EEH initialization
- *
- * Initialize EEH by trying to enable it for all of the adapters in the system.
- * As a side effect we can determine here if eeh is supported at all.
- * Note that we leave EEH on so failed config cycles won't cause a machine
- * check.  If a user turns off EEH for a particular adapter they are really
- * telling Linux to ignore errors.  Some hardware (e.g. POWER5) won't
- * grant access to a slot if EEH isn't enabled, and so we always enable
- * EEH for all slots/all devices.
- *
- * The eeh-force-off option disables EEH checking globally, for all slots.
- * Even if force-off is set, the EEH hardware is still enabled, so that
- * newer systems can boot.
- */
-static int __init eeh_init(void)
-{
-	struct pci_controller *hose, *tmp;
-	struct device_node *phb;
-	int ret;
-
-	/* call platform initialization function */
-	if (!eeh_ops) {
-		pr_warning("%s: Platform EEH operation not found\n",
-			__func__);
-		return -EEXIST;
-	} else if ((ret = eeh_ops->init())) {
-		pr_warning("%s: Failed to call platform init function (%d)\n",
-			__func__, ret);
-		return ret;
-	}
-
-	raw_spin_lock_init(&confirm_error_lock);
-
-	/* Enable EEH for all adapters */
-	if (eeh_probe_mode_devtree()) {
-		list_for_each_entry_safe(hose, tmp,
-			&hose_list, list_node) {
-			phb = hose->dn;
-			traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
-		}
-	}
-
-	if (eeh_subsystem_enabled)
-		pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
-	else
-		pr_warning("EEH: No capable adapters found\n");
-
-	return ret;
-}
-
-core_initcall_sync(eeh_init);
-
-/**
- * eeh_add_device_early - Enable EEH for the indicated device_node
- * @dn: device node for which to set up EEH
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- * This routine must be called before any i/o is performed to the
- * adapter (inluding any config-space i/o).
- * Whether this actually enables EEH or not for this device depends
- * on the CEC architecture, type of the device, on earlier boot
- * command-line arguments & etc.
- */
-static void eeh_add_device_early(struct device_node *dn)
-{
-	struct pci_controller *phb;
-
-	if (!of_node_to_eeh_dev(dn))
-		return;
-	phb = of_node_to_eeh_dev(dn)->phb;
-
-	/* USB Bus children of PCI devices will not have BUID's */
-	if (NULL == phb || 0 == phb->buid)
-		return;
-
-	/* FIXME: hotplug support on POWERNV */
-	eeh_ops->of_probe(dn, NULL);
-}
-
-/**
- * eeh_add_device_tree_early - Enable EEH for the indicated device
- * @dn: device node
- *
- * This routine must be used to perform EEH initialization for the
- * indicated PCI device that was added after system boot (e.g.
- * hotplug, dlpar).
- */
-void eeh_add_device_tree_early(struct device_node *dn)
-{
-	struct device_node *sib;
-
-	for_each_child_of_node(dn, sib)
-		eeh_add_device_tree_early(sib);
-	eeh_add_device_early(dn);
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
-
-/**
- * eeh_add_device_late - Perform EEH initialization for the indicated pci device
- * @dev: pci device for which to set up EEH
- *
- * This routine must be used to complete EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- */
-static void eeh_add_device_late(struct pci_dev *dev)
-{
-	struct device_node *dn;
-	struct eeh_dev *edev;
-
-	if (!dev || !eeh_subsystem_enabled)
-		return;
-
-	pr_debug("EEH: Adding device %s\n", pci_name(dev));
-
-	dn = pci_device_to_OF_node(dev);
-	edev = of_node_to_eeh_dev(dn);
-	if (edev->pdev == dev) {
-		pr_debug("EEH: Already referenced !\n");
-		return;
-	}
-	WARN_ON(edev->pdev);
-
-	pci_dev_get(dev);
-	edev->pdev = dev;
-	dev->dev.archdata.edev = edev;
-
-	eeh_addr_cache_insert_dev(dev);
-}
-
-/**
- * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_device_tree_late(struct pci_bus *bus)
-{
-	struct pci_dev *dev;
-
-	list_for_each_entry(dev, &bus->devices, bus_list) {
- 		eeh_add_device_late(dev);
- 		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- 			struct pci_bus *subbus = dev->subordinate;
- 			if (subbus)
- 				eeh_add_device_tree_late(subbus);
- 		}
-	}
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
-
-/**
- * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to add EEH sysfs files for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_sysfs_files(struct pci_bus *bus)
-{
-	struct pci_dev *dev;
-
-	list_for_each_entry(dev, &bus->devices, bus_list) {
-		eeh_sysfs_add_device(dev);
-		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-			struct pci_bus *subbus = dev->subordinate;
-			if (subbus)
-				eeh_add_sysfs_files(subbus);
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
-
-/**
- * eeh_remove_device - Undo EEH setup for the indicated pci device
- * @dev: pci device to be removed
- * @purge_pe: remove the PE or not
- *
- * This routine should be called when a device is removed from
- * a running system (e.g. by hotplug or dlpar).  It unregisters
- * the PCI device from the EEH subsystem.  I/O errors affecting
- * this device will no longer be detected after this call; thus,
- * i/o errors affecting this slot may leave this device unusable.
- */
-static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
-{
-	struct eeh_dev *edev;
-
-	if (!dev || !eeh_subsystem_enabled)
-		return;
-	edev = pci_dev_to_eeh_dev(dev);
-
-	/* Unregister the device with the EEH/PCI address search system */
-	pr_debug("EEH: Removing device %s\n", pci_name(dev));
-
-	if (!edev || !edev->pdev) {
-		pr_debug("EEH: Not referenced !\n");
-		return;
-	}
-	edev->pdev = NULL;
-	dev->dev.archdata.edev = NULL;
-	pci_dev_put(dev);
-
-	eeh_rmv_from_parent_pe(edev, purge_pe);
-	eeh_addr_cache_rmv_dev(dev);
-	eeh_sysfs_remove_device(dev);
-}
-
-/**
- * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
- * @dev: PCI device
- * @purge_pe: remove the corresponding PE or not
- *
- * This routine must be called when a device is removed from the
- * running system through hotplug or dlpar. The corresponding
- * PCI address cache will be removed.
- */
-void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe)
-{
-	struct pci_bus *bus = dev->subordinate;
-	struct pci_dev *child, *tmp;
-
-	eeh_remove_device(dev, purge_pe);
-
-	if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-		list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
-			 eeh_remove_bus_device(child, purge_pe);
-	}
-}
-EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
-
-static int proc_eeh_show(struct seq_file *m, void *v)
-{
-	if (0 == eeh_subsystem_enabled) {
-		seq_printf(m, "EEH Subsystem is globally disabled\n");
-		seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
-	} else {
-		seq_printf(m, "EEH Subsystem is enabled\n");
-		seq_printf(m,
-				"no device=%llu\n"
-				"no device node=%llu\n"
-				"no config address=%llu\n"
-				"check not wanted=%llu\n"
-				"eeh_total_mmio_ffs=%llu\n"
-				"eeh_false_positives=%llu\n"
-				"eeh_slot_resets=%llu\n",
-				eeh_stats.no_device,
-				eeh_stats.no_dn,
-				eeh_stats.no_cfg_addr,
-				eeh_stats.ignored_check,
-				eeh_stats.total_mmio_ffs,
-				eeh_stats.false_positives,
-				eeh_stats.slot_resets);
-	}
-
-	return 0;
-}
-
-static int proc_eeh_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_eeh_show, NULL);
-}
-
-static const struct file_operations proc_eeh_operations = {
-	.open      = proc_eeh_open,
-	.read      = seq_read,
-	.llseek    = seq_lseek,
-	.release   = single_release,
-};
-
-static int __init eeh_init_proc(void)
-{
-	if (machine_is(pseries))
-		proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
-	return 0;
-}
-__initcall(eeh_init_proc);
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
deleted file mode 100644
index 5a4c8790305..00000000000
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * PCI address cache; allows the lookup of PCI devices based on I/O address
- *
- * Copyright IBM Corporation 2004
- * Copyright Linas Vepstas <linas@austin.ibm.com> 2004
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/list.h>
-#include <linux/pci.h>
-#include <linux/rbtree.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-
-/**
- * The pci address cache subsystem.  This subsystem places
- * PCI device address resources into a red-black tree, sorted
- * according to the address range, so that given only an i/o
- * address, the corresponding PCI device can be **quickly**
- * found. It is safe to perform an address lookup in an interrupt
- * context; this ability is an important feature.
- *
- * Currently, the only customer of this code is the EEH subsystem;
- * thus, this code has been somewhat tailored to suit EEH better.
- * In particular, the cache does *not* hold the addresses of devices
- * for which EEH is not enabled.
- *
- * (Implementation Note: The RB tree seems to be better/faster
- * than any hash algo I could think of for this problem, even
- * with the penalty of slow pointer chases for d-cache misses).
- */
-struct pci_io_addr_range {
-	struct rb_node rb_node;
-	unsigned long addr_lo;
-	unsigned long addr_hi;
-	struct eeh_dev *edev;
-	struct pci_dev *pcidev;
-	unsigned int flags;
-};
-
-static struct pci_io_addr_cache {
-	struct rb_root rb_root;
-	spinlock_t piar_lock;
-} pci_io_addr_cache_root;
-
-static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
-{
-	struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
-
-	while (n) {
-		struct pci_io_addr_range *piar;
-		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-
-		if (addr < piar->addr_lo) {
-			n = n->rb_left;
-		} else {
-			if (addr > piar->addr_hi) {
-				n = n->rb_right;
-			} else {
-				pci_dev_get(piar->pcidev);
-				return piar->edev;
-			}
-		}
-	}
-
-	return NULL;
-}
-
-/**
- * eeh_addr_cache_get_dev - Get device, given only address
- * @addr: mmio (PIO) phys address or i/o port number
- *
- * Given an mmio phys address, or a port number, find a pci device
- * that implements this address.  Be sure to pci_dev_put the device
- * when finished.  I/O port numbers are assumed to be offset
- * from zero (that is, they do *not* have pci_io_addr added in).
- * It is safe to call this function within an interrupt.
- */
-struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr)
-{
-	struct eeh_dev *edev;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-	edev = __eeh_addr_cache_get_device(addr);
-	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-	return edev;
-}
-
-#ifdef DEBUG
-/*
- * Handy-dandy debug print routine, does nothing more
- * than print out the contents of our addr cache.
- */
-static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
-{
-	struct rb_node *n;
-	int cnt = 0;
-
-	n = rb_first(&cache->rb_root);
-	while (n) {
-		struct pci_io_addr_range *piar;
-		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-		pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n",
-		       (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
-		       piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
-		cnt++;
-		n = rb_next(n);
-	}
-}
-#endif
-
-/* Insert address range into the rb tree. */
-static struct pci_io_addr_range *
-eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
-		      unsigned long ahi, unsigned int flags)
-{
-	struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
-	struct rb_node *parent = NULL;
-	struct pci_io_addr_range *piar;
-
-	/* Walk tree, find a place to insert into tree */
-	while (*p) {
-		parent = *p;
-		piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
-		if (ahi < piar->addr_lo) {
-			p = &parent->rb_left;
-		} else if (alo > piar->addr_hi) {
-			p = &parent->rb_right;
-		} else {
-			if (dev != piar->pcidev ||
-			    alo != piar->addr_lo || ahi != piar->addr_hi) {
-				pr_warning("PIAR: overlapping address range\n");
-			}
-			return piar;
-		}
-	}
-	piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
-	if (!piar)
-		return NULL;
-
-	pci_dev_get(dev);
-	piar->addr_lo = alo;
-	piar->addr_hi = ahi;
-	piar->edev = pci_dev_to_eeh_dev(dev);
-	piar->pcidev = dev;
-	piar->flags = flags;
-
-#ifdef DEBUG
-	pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n",
-	                  alo, ahi, pci_name(dev));
-#endif
-
-	rb_link_node(&piar->rb_node, parent, p);
-	rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
-
-	return piar;
-}
-
-static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
-{
-	struct device_node *dn;
-	struct eeh_dev *edev;
-	int i;
-
-	dn = pci_device_to_OF_node(dev);
-	if (!dn) {
-		pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev));
-		return;
-	}
-
-	edev = of_node_to_eeh_dev(dn);
-	if (!edev) {
-		pr_warning("PCI: no EEH dev found for dn=%s\n",
-			dn->full_name);
-		return;
-	}
-
-	/* Skip any devices for which EEH is not enabled. */
-	if (!edev->pe) {
-#ifdef DEBUG
-		pr_info("PCI: skip building address cache for=%s - %s\n",
-			pci_name(dev), dn->full_name);
-#endif
-		return;
-	}
-
-	/* Walk resources on this device, poke them into the tree */
-	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-		unsigned long start = pci_resource_start(dev,i);
-		unsigned long end = pci_resource_end(dev,i);
-		unsigned int flags = pci_resource_flags(dev,i);
-
-		/* We are interested only bus addresses, not dma or other stuff */
-		if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
-			continue;
-		if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
-			 continue;
-		eeh_addr_cache_insert(dev, start, end, flags);
-	}
-}
-
-/**
- * eeh_addr_cache_insert_dev - Add a device to the address cache
- * @dev: PCI device whose I/O addresses we are interested in.
- *
- * In order to support the fast lookup of devices based on addresses,
- * we maintain a cache of devices that can be quickly searched.
- * This routine adds a device to that cache.
- */
-void eeh_addr_cache_insert_dev(struct pci_dev *dev)
-{
-	unsigned long flags;
-
-	/* Ignore PCI bridges */
-	if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
-		return;
-
-	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-	__eeh_addr_cache_insert_dev(dev);
-	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-}
-
-static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev)
-{
-	struct rb_node *n;
-
-restart:
-	n = rb_first(&pci_io_addr_cache_root.rb_root);
-	while (n) {
-		struct pci_io_addr_range *piar;
-		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-
-		if (piar->pcidev == dev) {
-			rb_erase(n, &pci_io_addr_cache_root.rb_root);
-			pci_dev_put(piar->pcidev);
-			kfree(piar);
-			goto restart;
-		}
-		n = rb_next(n);
-	}
-}
-
-/**
- * eeh_addr_cache_rmv_dev - remove pci device from addr cache
- * @dev: device to remove
- *
- * Remove a device from the addr-cache tree.
- * This is potentially expensive, since it will walk
- * the tree multiple times (once per resource).
- * But so what; device removal doesn't need to be that fast.
- */
-void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-	__eeh_addr_cache_rmv_dev(dev);
-	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-}
-
-/**
- * eeh_addr_cache_build - Build a cache of I/O addresses
- *
- * Build a cache of pci i/o addresses.  This cache will be used to
- * find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scanned for devices (after all device resources are known).
- */
-void __init eeh_addr_cache_build(void)
-{
-	struct device_node *dn;
-	struct eeh_dev *edev;
-	struct pci_dev *dev = NULL;
-
-	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
-
-	for_each_pci_dev(dev) {
-		eeh_addr_cache_insert_dev(dev);
-
-		dn = pci_device_to_OF_node(dev);
-		if (!dn)
-			continue;
-
-		edev = of_node_to_eeh_dev(dn);
-		if (!edev)
-			continue;
-
-		pci_dev_get(dev);  /* matching put is in eeh_remove_device() */
-		dev->dev.archdata.edev = edev;
-		edev->pdev = dev;
-
-		eeh_sysfs_add_device(dev);
-	}
-
-#ifdef DEBUG
-	/* Verify tree built up above, echo back the list of addrs. */
-	eeh_addr_cache_print(&pci_io_addr_cache_root);
-#endif
-}
-
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
deleted file mode 100644
index 1efa28f5fc5..00000000000
--- a/arch/powerpc/platforms/pseries/eeh_dev.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * The file intends to implement dynamic creation of EEH device, which will
- * be bound with OF node and PCI device simutaneously. The EEH devices would
- * be foundamental information for EEH core components to work proerly. Besides,
- * We have to support multiple situations where dynamic creation of EEH device
- * is required:
- *
- * 1) Before PCI emunation starts, we need create EEH devices according to the
- *    PCI sensitive OF nodes.
- * 2) When PCI emunation is done, we need do the binding between PCI device and
- *    the associated EEH device.
- * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
- *    will be created while PCI sensitive OF node is detected from DR.
- * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
- *    PHB is newly inserted, we also need create EEH devices accordingly.
- *
- * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/export.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-/**
- * eeh_dev_init - Create EEH device according to OF node
- * @dn: device node
- * @data: PHB
- *
- * It will create EEH device according to the given OF node. The function
- * might be called by PCI emunation, DR, PHB hotplug.
- */
-void *eeh_dev_init(struct device_node *dn, void *data)
-{
-	struct pci_controller *phb = data;
-	struct eeh_dev *edev;
-
-	/* Allocate EEH device */
-	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
-	if (!edev) {
-		pr_warning("%s: out of memory\n", __func__);
-		return NULL;
-	}
-
-	/* Associate EEH device with OF node */
-	PCI_DN(dn)->edev = edev;
-	edev->dn  = dn;
-	edev->phb = phb;
-	INIT_LIST_HEAD(&edev->list);
-
-	return NULL;
-}
-
-/**
- * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
- * @phb: PHB
- *
- * Scan the PHB OF node and its child association, then create the
- * EEH devices accordingly
- */
-void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
-{
-	struct device_node *dn = phb->dn;
-
-	/* EEH PE for PHB */
-	eeh_phb_pe_create(phb);
-
-	/* EEH device for PHB */
-	eeh_dev_init(dn, phb);
-
-	/* EEH devices for children OF nodes */
-	traverse_pci_devices(dn, eeh_dev_init, phb);
-}
-
-/**
- * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
- *
- * Scan all the existing PHBs and create EEH devices for their OF
- * nodes and their children OF nodes
- */
-static int __init eeh_dev_phb_init(void)
-{
-	struct pci_controller *phb, *tmp;
-
-	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
-		eeh_dev_phb_init_dynamic(phb);
-
-	pr_info("EEH: devices created\n");
-
-	return 0;
-}
-
-core_initcall(eeh_dev_phb_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
deleted file mode 100644
index a3fefb61097..00000000000
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ /dev/null
@@ -1,552 +0,0 @@
-/*
- * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
- * Copyright IBM Corp. 2004 2005
- * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
- */
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <asm/eeh.h>
-#include <asm/eeh_event.h>
-#include <asm/ppc-pci.h>
-#include <asm/pci-bridge.h>
-#include <asm/prom.h>
-#include <asm/rtas.h>
-
-/**
- * eeh_pcid_name - Retrieve name of PCI device driver
- * @pdev: PCI device
- *
- * This routine is used to retrieve the name of PCI device driver
- * if that's valid.
- */
-static inline const char *eeh_pcid_name(struct pci_dev *pdev)
-{
-	if (pdev && pdev->dev.driver)
-		return pdev->dev.driver->name;
-	return "";
-}
-
-/**
- * eeh_pcid_get - Get the PCI device driver
- * @pdev: PCI device
- *
- * The function is used to retrieve the PCI device driver for
- * the indicated PCI device. Besides, we will increase the reference
- * of the PCI device driver to prevent that being unloaded on
- * the fly. Otherwise, kernel crash would be seen.
- */
-static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
-{
-	if (!pdev || !pdev->driver)
-		return NULL;
-
-	if (!try_module_get(pdev->driver->driver.owner))
-		return NULL;
-
-	return pdev->driver;
-}
-
-/**
- * eeh_pcid_put - Dereference on the PCI device driver
- * @pdev: PCI device
- *
- * The function is called to do dereference on the PCI device
- * driver of the indicated PCI device.
- */
-static inline void eeh_pcid_put(struct pci_dev *pdev)
-{
-	if (!pdev || !pdev->driver)
-		return;
-
-	module_put(pdev->driver->driver.owner);
-}
-
-#if 0
-static void print_device_node_tree(struct pci_dn *pdn, int dent)
-{
-	int i;
-	struct device_node *pc;
-
-	if (!pdn)
-		return;
-	for (i = 0; i < dent; i++)
-		printk(" ");
-	printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
-		pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
-		pdn->eeh_pe_config_addr, pdn->node->full_name);
-	dent += 3;
-	pc = pdn->node->child;
-	while (pc) {
-		print_device_node_tree(PCI_DN(pc), dent);
-		pc = pc->sibling;
-	}
-}
-#endif
-
-/**
- * eeh_disable_irq - Disable interrupt for the recovering device
- * @dev: PCI device
- *
- * This routine must be called when reporting temporary or permanent
- * error to the particular PCI device to disable interrupt of that
- * device. If the device has enabled MSI or MSI-X interrupt, we needn't
- * do real work because EEH should freeze DMA transfers for those PCI
- * devices encountering EEH errors, which includes MSI or MSI-X.
- */
-static void eeh_disable_irq(struct pci_dev *dev)
-{
-	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-
-	/* Don't disable MSI and MSI-X interrupts. They are
-	 * effectively disabled by the DMA Stopped state
-	 * when an EEH error occurs.
-	 */
-	if (dev->msi_enabled || dev->msix_enabled)
-		return;
-
-	if (!irq_has_action(dev->irq))
-		return;
-
-	edev->mode |= EEH_DEV_IRQ_DISABLED;
-	disable_irq_nosync(dev->irq);
-}
-
-/**
- * eeh_enable_irq - Enable interrupt for the recovering device
- * @dev: PCI device
- *
- * This routine must be called to enable interrupt while failed
- * device could be resumed.
- */
-static void eeh_enable_irq(struct pci_dev *dev)
-{
-	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-
-	if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
-		edev->mode &= ~EEH_DEV_IRQ_DISABLED;
-		enable_irq(dev->irq);
-	}
-}
-
-/**
- * eeh_report_error - Report pci error to each device driver
- * @data: eeh device
- * @userdata: return value
- * 
- * Report an EEH error to each device driver, collect up and 
- * merge the device driver responses. Cumulative response 
- * passed back in "userdata".
- */
-static void *eeh_report_error(void *data, void *userdata)
-{
-	struct eeh_dev *edev = (struct eeh_dev *)data;
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	enum pci_ers_result rc, *res = userdata;
-	struct pci_driver *driver;
-
-	/* We might not have the associated PCI device,
-	 * then we should continue for next one.
-	 */
-	if (!dev) return NULL;
-	dev->error_state = pci_channel_io_frozen;
-
-	driver = eeh_pcid_get(dev);
-	if (!driver) return NULL;
-
-	eeh_disable_irq(dev);
-
-	if (!driver->err_handler ||
-	    !driver->err_handler->error_detected) {
-		eeh_pcid_put(dev);
-		return NULL;
-	}
-
-	rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
-
-	/* A driver that needs a reset trumps all others */
-	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
-	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
-
-	eeh_pcid_put(dev);
-	return NULL;
-}
-
-/**
- * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
- * @data: eeh device
- * @userdata: return value
- *
- * Tells each device driver that IO ports, MMIO and config space I/O
- * are now enabled. Collects up and merges the device driver responses.
- * Cumulative response passed back in "userdata".
- */
-static void *eeh_report_mmio_enabled(void *data, void *userdata)
-{
-	struct eeh_dev *edev = (struct eeh_dev *)data;
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	enum pci_ers_result rc, *res = userdata;
-	struct pci_driver *driver;
-
-	driver = eeh_pcid_get(dev);
-	if (!driver) return NULL;
-
-	if (!driver->err_handler ||
-	    !driver->err_handler->mmio_enabled) {
-		eeh_pcid_put(dev);
-		return NULL;
-	}
-
-	rc = driver->err_handler->mmio_enabled(dev);
-
-	/* A driver that needs a reset trumps all others */
-	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
-	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
-
-	eeh_pcid_put(dev);
-	return NULL;
-}
-
-/**
- * eeh_report_reset - Tell device that slot has been reset
- * @data: eeh device
- * @userdata: return value
- *
- * This routine must be called while EEH tries to reset particular
- * PCI device so that the associated PCI device driver could take
- * some actions, usually to save data the driver needs so that the
- * driver can work again while the device is recovered.
- */
-static void *eeh_report_reset(void *data, void *userdata)
-{
-	struct eeh_dev *edev = (struct eeh_dev *)data;
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	enum pci_ers_result rc, *res = userdata;
-	struct pci_driver *driver;
-
-	if (!dev) return NULL;
-	dev->error_state = pci_channel_io_normal;
-
-	driver = eeh_pcid_get(dev);
-	if (!driver) return NULL;
-
-	eeh_enable_irq(dev);
-
-	if (!driver->err_handler ||
-	    !driver->err_handler->slot_reset) {
-		eeh_pcid_put(dev);
-		return NULL;
-	}
-
-	rc = driver->err_handler->slot_reset(dev);
-	if ((*res == PCI_ERS_RESULT_NONE) ||
-	    (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
-	if (*res == PCI_ERS_RESULT_DISCONNECT &&
-	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
-
-	eeh_pcid_put(dev);
-	return NULL;
-}
-
-/**
- * eeh_report_resume - Tell device to resume normal operations
- * @data: eeh device
- * @userdata: return value
- *
- * This routine must be called to notify the device driver that it
- * could resume so that the device driver can do some initialization
- * to make the recovered device work again.
- */
-static void *eeh_report_resume(void *data, void *userdata)
-{
-	struct eeh_dev *edev = (struct eeh_dev *)data;
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	struct pci_driver *driver;
-
-	if (!dev) return NULL;
-	dev->error_state = pci_channel_io_normal;
-
-	driver = eeh_pcid_get(dev);
-	if (!driver) return NULL;
-
-	eeh_enable_irq(dev);
-
-	if (!driver->err_handler ||
-	    !driver->err_handler->resume) {
-		eeh_pcid_put(dev);
-		return NULL;
-	}
-
-	driver->err_handler->resume(dev);
-
-	eeh_pcid_put(dev);
-	return NULL;
-}
-
-/**
- * eeh_report_failure - Tell device driver that device is dead.
- * @data: eeh device
- * @userdata: return value
- *
- * This informs the device driver that the device is permanently
- * dead, and that no further recovery attempts will be made on it.
- */
-static void *eeh_report_failure(void *data, void *userdata)
-{
-	struct eeh_dev *edev = (struct eeh_dev *)data;
-	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	struct pci_driver *driver;
-
-	if (!dev) return NULL;
-	dev->error_state = pci_channel_io_perm_failure;
-
-	driver = eeh_pcid_get(dev);
-	if (!driver) return NULL;
-
-	eeh_disable_irq(dev);
-
-	if (!driver->err_handler ||
-	    !driver->err_handler->error_detected) {
-		eeh_pcid_put(dev);
-		return NULL;
-	}
-
-	driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
-
-	eeh_pcid_put(dev);
-	return NULL;
-}
-
-/**
- * eeh_reset_device - Perform actual reset of a pci slot
- * @pe: EEH PE
- * @bus: PCI bus corresponding to the isolcated slot
- *
- * This routine must be called to do reset on the indicated PE.
- * During the reset, udev might be invoked because those affected
- * PCI devices will be removed and then added.
- */
-static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
-{
-	int cnt, rc;
-
-	/* pcibios will clear the counter; save the value */
-	cnt = pe->freeze_count;
-
-	/*
-	 * We don't remove the corresponding PE instances because
-	 * we need the information afterwords. The attached EEH
-	 * devices are expected to be attached soon when calling
-	 * into pcibios_add_pci_devices().
-	 */
-	if (bus)
-		__pcibios_remove_pci_devices(bus, 0);
-
-	/* Reset the pci controller. (Asserts RST#; resets config space).
-	 * Reconfigure bridges and devices. Don't try to bring the system
-	 * up if the reset failed for some reason.
-	 */
-	rc = eeh_reset_pe(pe);
-	if (rc)
-		return rc;
-
-	/* Restore PE */
-	eeh_ops->configure_bridge(pe);
-	eeh_pe_restore_bars(pe);
-
-	/* Give the system 5 seconds to finish running the user-space
-	 * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes, 
-	 * this is a hack, but if we don't do this, and try to bring 
-	 * the device up before the scripts have taken it down, 
-	 * potentially weird things happen.
-	 */
-	if (bus) {
-		ssleep(5);
-		pcibios_add_pci_devices(bus);
-	}
-	pe->freeze_count = cnt;
-
-	return 0;
-}
-
-/* The longest amount of time to wait for a pci device
- * to come back on line, in seconds.
- */
-#define MAX_WAIT_FOR_RECOVERY 150
-
-/**
- * eeh_handle_event - Reset a PCI device after hard lockup.
- * @pe: EEH PE
- *
- * While PHB detects address or data parity errors on particular PCI
- * slot, the associated PE will be frozen. Besides, DMA's occurring
- * to wild addresses (which usually happen due to bugs in device
- * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
- * #PERR or other misc PCI-related errors also can trigger EEH errors.
- *
- * Recovery process consists of unplugging the device driver (which
- * generated hotplug events to userspace), then issuing a PCI #RST to
- * the device, then reconfiguring the PCI config space for all bridges
- * & devices under this slot, and then finally restarting the device
- * drivers (which cause a second set of hotplug events to go out to
- * userspace).
- */
-void eeh_handle_event(struct eeh_pe *pe)
-{
-	struct pci_bus *frozen_bus;
-	int rc = 0;
-	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
-
-	frozen_bus = eeh_pe_bus_get(pe);
-	if (!frozen_bus) {
-		pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
-			__func__, pe->phb->global_number, pe->addr);
-		return;
-	}
-
-	pe->freeze_count++;
-	if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
-		goto excess_failures;
-	pr_warning("EEH: This PCI device has failed %d times in the last hour\n",
-		pe->freeze_count);
-
-	/* Walk the various device drivers attached to this slot through
-	 * a reset sequence, giving each an opportunity to do what it needs
-	 * to accomplish the reset.  Each child gets a report of the
-	 * status ... if any child can't handle the reset, then the entire
-	 * slot is dlpar removed and added.
-	 */
-	eeh_pe_dev_traverse(pe, eeh_report_error, &result);
-
-	/* Get the current PCI slot state. This can take a long time,
-	 * sometimes over 3 seconds for certain systems.
-	 */
-	rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
-	if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
-		printk(KERN_WARNING "EEH: Permanent failure\n");
-		goto hard_fail;
-	}
-
-	/* Since rtas may enable MMIO when posting the error log,
-	 * don't post the error log until after all dev drivers
-	 * have been informed.
-	 */
-	eeh_slot_error_detail(pe, EEH_LOG_TEMP);
-
-	/* If all device drivers were EEH-unaware, then shut
-	 * down all of the device drivers, and hope they
-	 * go down willingly, without panicing the system.
-	 */
-	if (result == PCI_ERS_RESULT_NONE) {
-		rc = eeh_reset_device(pe, frozen_bus);
-		if (rc) {
-			printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
-			goto hard_fail;
-		}
-	}
-
-	/* If all devices reported they can proceed, then re-enable MMIO */
-	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-		rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
-
-		if (rc < 0)
-			goto hard_fail;
-		if (rc) {
-			result = PCI_ERS_RESULT_NEED_RESET;
-		} else {
-			result = PCI_ERS_RESULT_NONE;
-			eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
-		}
-	}
-
-	/* If all devices reported they can proceed, then re-enable DMA */
-	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-		rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
-
-		if (rc < 0)
-			goto hard_fail;
-		if (rc)
-			result = PCI_ERS_RESULT_NEED_RESET;
-		else
-			result = PCI_ERS_RESULT_RECOVERED;
-	}
-
-	/* If any device has a hard failure, then shut off everything. */
-	if (result == PCI_ERS_RESULT_DISCONNECT) {
-		printk(KERN_WARNING "EEH: Device driver gave up\n");
-		goto hard_fail;
-	}
-
-	/* If any device called out for a reset, then reset the slot */
-	if (result == PCI_ERS_RESULT_NEED_RESET) {
-		rc = eeh_reset_device(pe, NULL);
-		if (rc) {
-			printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
-			goto hard_fail;
-		}
-		result = PCI_ERS_RESULT_NONE;
-		eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
-	}
-
-	/* All devices should claim they have recovered by now. */
-	if ((result != PCI_ERS_RESULT_RECOVERED) &&
-	    (result != PCI_ERS_RESULT_NONE)) {
-		printk(KERN_WARNING "EEH: Not recovered\n");
-		goto hard_fail;
-	}
-
-	/* Tell all device drivers that they can resume operations */
-	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
-
-	return;
-	
-excess_failures:
-	/*
-	 * About 90% of all real-life EEH failures in the field
-	 * are due to poorly seated PCI cards. Only 10% or so are
-	 * due to actual, failed cards.
-	 */
-	pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
-	       "last hour and has been permanently disabled.\n"
-	       "Please try reseating or replacing it.\n",
-		pe->phb->global_number, pe->addr,
-		pe->freeze_count);
-	goto perm_error;
-
-hard_fail:
-	pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
-	       "Please try reseating or replacing it\n",
-		pe->phb->global_number, pe->addr);
-
-perm_error:
-	eeh_slot_error_detail(pe, EEH_LOG_PERM);
-
-	/* Notify all devices that they're about to go down. */
-	eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
-
-	/* Shut down the device drivers for good. */
-	if (frozen_bus)
-		pcibios_remove_pci_devices(frozen_bus);
-}
-
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
deleted file mode 100644
index 185bedd926d..00000000000
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
- */
-
-#include <linux/delay.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include <linux/kthread.h>
-#include <asm/eeh_event.h>
-#include <asm/ppc-pci.h>
-
-/** Overview:
- *  EEH error states may be detected within exception handlers;
- *  however, the recovery processing needs to occur asynchronously
- *  in a normal kernel context and not an interrupt context.
- *  This pair of routines creates an event and queues it onto a
- *  work-queue, where a worker thread can drive recovery.
- */
-
-/* EEH event workqueue setup. */
-static DEFINE_SPINLOCK(eeh_eventlist_lock);
-LIST_HEAD(eeh_eventlist);
-static void eeh_thread_launcher(struct work_struct *);
-DECLARE_WORK(eeh_event_wq, eeh_thread_launcher);
-
-/* Serialize reset sequences for a given pci device */
-DEFINE_MUTEX(eeh_event_mutex);
-
-/**
- * eeh_event_handler - Dispatch EEH events.
- * @dummy - unused
- *
- * The detection of a frozen slot can occur inside an interrupt,
- * where it can be hard to do anything about it.  The goal of this
- * routine is to pull these detection events out of the context
- * of the interrupt handler, and re-dispatch them for processing
- * at a later time in a normal context.
- */
-static int eeh_event_handler(void * dummy)
-{
-	unsigned long flags;
-	struct eeh_event *event;
-	struct eeh_pe *pe;
-
-	spin_lock_irqsave(&eeh_eventlist_lock, flags);
-	event = NULL;
-
-	/* Unqueue the event, get ready to process. */
-	if (!list_empty(&eeh_eventlist)) {
-		event = list_entry(eeh_eventlist.next, struct eeh_event, list);
-		list_del(&event->list);
-	}
-	spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
-
-	if (event == NULL)
-		return 0;
-
-	/* Serialize processing of EEH events */
-	mutex_lock(&eeh_event_mutex);
-	pe = event->pe;
-	eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
-	pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
-		pe->phb->global_number, pe->addr);
-
-	set_current_state(TASK_INTERRUPTIBLE);	/* Don't add to load average */
-	eeh_handle_event(pe);
-	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
-
-	kfree(event);
-	mutex_unlock(&eeh_event_mutex);
-
-	/* If there are no new errors after an hour, clear the counter. */
-	if (pe && pe->freeze_count > 0) {
-		msleep_interruptible(3600*1000);
-		if (pe->freeze_count > 0)
-			pe->freeze_count--;
-
-	}
-
-	return 0;
-}
-
-/**
- * eeh_thread_launcher - Start kernel thread to handle EEH events
- * @dummy - unused
- *
- * This routine is called to start the kernel thread for processing
- * EEH event.
- */
-static void eeh_thread_launcher(struct work_struct *dummy)
-{
-	if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd")))
-		printk(KERN_ERR "Failed to start EEH daemon\n");
-}
-
-/**
- * eeh_send_failure_event - Generate a PCI error event
- * @pe: EEH PE
- *
- * This routine can be called within an interrupt context;
- * the actual event will be delivered in a normal context
- * (from a workqueue).
- */
-int eeh_send_failure_event(struct eeh_pe *pe)
-{
-	unsigned long flags;
-	struct eeh_event *event;
-
-	event = kzalloc(sizeof(*event), GFP_ATOMIC);
-	if (!event) {
-		pr_err("EEH: out of memory, event not handled\n");
-		return -ENOMEM;
-	}
-	event->pe = pe;
-
-	/* We may or may not be called in an interrupt context */
-	spin_lock_irqsave(&eeh_eventlist_lock, flags);
-	list_add(&event->list, &eeh_eventlist);
-	spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
-
-	schedule_work(&eeh_event_wq);
-
-	return 0;
-}
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
deleted file mode 100644
index fe43d1aa2cf..00000000000
--- a/arch/powerpc/platforms/pseries/eeh_pe.c
+++ /dev/null
@@ -1,652 +0,0 @@
-/*
- * The file intends to implement PE based on the information from
- * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
- * All the PEs should be organized as hierarchy tree. The first level
- * of the tree will be associated to existing PHBs since the particular
- * PE is only meaningful in one PHB domain.
- *
- * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/export.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-static LIST_HEAD(eeh_phb_pe);
-
-/**
- * eeh_pe_alloc - Allocate PE
- * @phb: PCI controller
- * @type: PE type
- *
- * Allocate PE instance dynamically.
- */
-static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
-{
-	struct eeh_pe *pe;
-
-	/* Allocate PHB PE */
-	pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
-	if (!pe) return NULL;
-
-	/* Initialize PHB PE */
-	pe->type = type;
-	pe->phb = phb;
-	INIT_LIST_HEAD(&pe->child_list);
-	INIT_LIST_HEAD(&pe->child);
-	INIT_LIST_HEAD(&pe->edevs);
-
-	return pe;
-}
-
-/**
- * eeh_phb_pe_create - Create PHB PE
- * @phb: PCI controller
- *
- * The function should be called while the PHB is detected during
- * system boot or PCI hotplug in order to create PHB PE.
- */
-int eeh_phb_pe_create(struct pci_controller *phb)
-{
-	struct eeh_pe *pe;
-
-	/* Allocate PHB PE */
-	pe = eeh_pe_alloc(phb, EEH_PE_PHB);
-	if (!pe) {
-		pr_err("%s: out of memory!\n", __func__);
-		return -ENOMEM;
-	}
-
-	/* Put it into the list */
-	eeh_lock();
-	list_add_tail(&pe->child, &eeh_phb_pe);
-	eeh_unlock();
-
-	pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
-
-	return 0;
-}
-
-/**
- * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
- * @phb: PCI controller
- *
- * The overall PEs form hierarchy tree. The first layer of the
- * hierarchy tree is composed of PHB PEs. The function is used
- * to retrieve the corresponding PHB PE according to the given PHB.
- */
-static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
-{
-	struct eeh_pe *pe;
-
-	list_for_each_entry(pe, &eeh_phb_pe, child) {
-		/*
-		 * Actually, we needn't check the type since
-		 * the PE for PHB has been determined when that
-		 * was created.
-		 */
-		if ((pe->type & EEH_PE_PHB) && pe->phb == phb)
-			return pe;
-	}
-
-	return NULL;
-}
-
-/**
- * eeh_pe_next - Retrieve the next PE in the tree
- * @pe: current PE
- * @root: root PE
- *
- * The function is used to retrieve the next PE in the
- * hierarchy PE tree.
- */
-static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
-				  struct eeh_pe *root)
-{
-	struct list_head *next = pe->child_list.next;
-
-	if (next == &pe->child_list) {
-		while (1) {
-			if (pe == root)
-				return NULL;
-			next = pe->child.next;
-			if (next != &pe->parent->child_list)
-				break;
-			pe = pe->parent;
-		}
-	}
-
-	return list_entry(next, struct eeh_pe, child);
-}
-
-/**
- * eeh_pe_traverse - Traverse PEs in the specified PHB
- * @root: root PE
- * @fn: callback
- * @flag: extra parameter to callback
- *
- * The function is used to traverse the specified PE and its
- * child PEs. The traversing is to be terminated once the
- * callback returns something other than NULL, or no more PEs
- * to be traversed.
- */
-static void *eeh_pe_traverse(struct eeh_pe *root,
-			eeh_traverse_func fn, void *flag)
-{
-	struct eeh_pe *pe;
-	void *ret;
-
-	for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
-		ret = fn(pe, flag);
-		if (ret) return ret;
-	}
-
-	return NULL;
-}
-
-/**
- * eeh_pe_dev_traverse - Traverse the devices from the PE
- * @root: EEH PE
- * @fn: function callback
- * @flag: extra parameter to callback
- *
- * The function is used to traverse the devices of the specified
- * PE and its child PEs.
- */
-void *eeh_pe_dev_traverse(struct eeh_pe *root,
-		eeh_traverse_func fn, void *flag)
-{
-	struct eeh_pe *pe;
-	struct eeh_dev *edev;
-	void *ret;
-
-	if (!root) {
-		pr_warning("%s: Invalid PE %p\n", __func__, root);
-		return NULL;
-	}
-
-	eeh_lock();
-
-	/* Traverse root PE */
-	for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
-		eeh_pe_for_each_dev(pe, edev) {
-			ret = fn(edev, flag);
-			if (ret) {
-				eeh_unlock();
-				return ret;
-			}
-		}
-	}
-
-	eeh_unlock();
-
-	return NULL;
-}
-
-/**
- * __eeh_pe_get - Check the PE address
- * @data: EEH PE
- * @flag: EEH device
- *
- * For one particular PE, it can be identified by PE address
- * or tranditional BDF address. BDF address is composed of
- * Bus/Device/Function number. The extra data referred by flag
- * indicates which type of address should be used.
- */
-static void *__eeh_pe_get(void *data, void *flag)
-{
-	struct eeh_pe *pe = (struct eeh_pe *)data;
-	struct eeh_dev *edev = (struct eeh_dev *)flag;
-
-	/* Unexpected PHB PE */
-	if (pe->type & EEH_PE_PHB)
-		return NULL;
-
-	/* We prefer PE address */
-	if (edev->pe_config_addr &&
-	   (edev->pe_config_addr == pe->addr))
-		return pe;
-
-	/* Try BDF address */
-	if (edev->pe_config_addr &&
-	   (edev->config_addr == pe->config_addr))
-		return pe;
-
-	return NULL;
-}
-
-/**
- * eeh_pe_get - Search PE based on the given address
- * @edev: EEH device
- *
- * Search the corresponding PE based on the specified address which
- * is included in the eeh device. The function is used to check if
- * the associated PE has been created against the PE address. It's
- * notable that the PE address has 2 format: traditional PE address
- * which is composed of PCI bus/device/function number, or unified
- * PE address.
- */
-static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
-{
-	struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
-	struct eeh_pe *pe;
-
-	pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
-
-	return pe;
-}
-
-/**
- * eeh_pe_get_parent - Retrieve the parent PE
- * @edev: EEH device
- *
- * The whole PEs existing in the system are organized as hierarchy
- * tree. The function is used to retrieve the parent PE according
- * to the parent EEH device.
- */
-static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
-{
-	struct device_node *dn;
-	struct eeh_dev *parent;
-
-	/*
-	 * It might have the case for the indirect parent
-	 * EEH device already having associated PE, but
-	 * the direct parent EEH device doesn't have yet.
-	 */
-	dn = edev->dn->parent;
-	while (dn) {
-		/* We're poking out of PCI territory */
-		if (!PCI_DN(dn)) return NULL;
-
-		parent = of_node_to_eeh_dev(dn);
-		/* We're poking out of PCI territory */
-		if (!parent) return NULL;
-
-		if (parent->pe)
-			return parent->pe;
-
-		dn = dn->parent;
-	}
-
-	return NULL;
-}
-
-/**
- * eeh_add_to_parent_pe - Add EEH device to parent PE
- * @edev: EEH device
- *
- * Add EEH device to the parent PE. If the parent PE already
- * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
- * we have to create new PE to hold the EEH device and the new
- * PE will be linked to its parent PE as well.
- */
-int eeh_add_to_parent_pe(struct eeh_dev *edev)
-{
-	struct eeh_pe *pe, *parent;
-
-	eeh_lock();
-
-	/*
-	 * Search the PE has been existing or not according
-	 * to the PE address. If that has been existing, the
-	 * PE should be composed of PCI bus and its subordinate
-	 * components.
-	 */
-	pe = eeh_pe_get(edev);
-	if (pe && !(pe->type & EEH_PE_INVALID)) {
-		if (!edev->pe_config_addr) {
-			eeh_unlock();
-			pr_err("%s: PE with addr 0x%x already exists\n",
-				__func__, edev->config_addr);
-			return -EEXIST;
-		}
-
-		/* Mark the PE as type of PCI bus */
-		pe->type = EEH_PE_BUS;
-		edev->pe = pe;
-
-		/* Put the edev to PE */
-		list_add_tail(&edev->list, &pe->edevs);
-		eeh_unlock();
-		pr_debug("EEH: Add %s to Bus PE#%x\n",
-			edev->dn->full_name, pe->addr);
-
-		return 0;
-	} else if (pe && (pe->type & EEH_PE_INVALID)) {
-		list_add_tail(&edev->list, &pe->edevs);
-		edev->pe = pe;
-		/*
-		 * We're running to here because of PCI hotplug caused by
-		 * EEH recovery. We need clear EEH_PE_INVALID until the top.
-		 */
-		parent = pe;
-		while (parent) {
-			if (!(parent->type & EEH_PE_INVALID))
-				break;
-			parent->type &= ~EEH_PE_INVALID;
-			parent = parent->parent;
-		}
-		eeh_unlock();
-		pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
-			edev->dn->full_name, pe->addr, pe->parent->addr);
-
-		return 0;
-	}
-
-	/* Create a new EEH PE */
-	pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
-	if (!pe) {
-		eeh_unlock();
-		pr_err("%s: out of memory!\n", __func__);
-		return -ENOMEM;
-	}
-	pe->addr	= edev->pe_config_addr;
-	pe->config_addr	= edev->config_addr;
-
-	/*
-	 * Put the new EEH PE into hierarchy tree. If the parent
-	 * can't be found, the newly created PE will be attached
-	 * to PHB directly. Otherwise, we have to associate the
-	 * PE with its parent.
-	 */
-	parent = eeh_pe_get_parent(edev);
-	if (!parent) {
-		parent = eeh_phb_pe_get(edev->phb);
-		if (!parent) {
-			eeh_unlock();
-			pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
-				__func__, edev->phb->global_number);
-			edev->pe = NULL;
-			kfree(pe);
-			return -EEXIST;
-		}
-	}
-	pe->parent = parent;
-
-	/*
-	 * Put the newly created PE into the child list and
-	 * link the EEH device accordingly.
-	 */
-	list_add_tail(&pe->child, &parent->child_list);
-	list_add_tail(&edev->list, &pe->edevs);
-	edev->pe = pe;
-	eeh_unlock();
-	pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
-		edev->dn->full_name, pe->addr, pe->parent->addr);
-
-	return 0;
-}
-
-/**
- * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
- * @edev: EEH device
- * @purge_pe: remove PE or not
- *
- * The PE hierarchy tree might be changed when doing PCI hotplug.
- * Also, the PCI devices or buses could be removed from the system
- * during EEH recovery. So we have to call the function remove the
- * corresponding PE accordingly if necessary.
- */
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
-{
-	struct eeh_pe *pe, *parent, *child;
-	int cnt;
-
-	if (!edev->pe) {
-		pr_warning("%s: No PE found for EEH device %s\n",
-			__func__, edev->dn->full_name);
-		return -EEXIST;
-	}
-
-	eeh_lock();
-
-	/* Remove the EEH device */
-	pe = edev->pe;
-	edev->pe = NULL;
-	list_del(&edev->list);
-
-	/*
-	 * Check if the parent PE includes any EEH devices.
-	 * If not, we should delete that. Also, we should
-	 * delete the parent PE if it doesn't have associated
-	 * child PEs and EEH devices.
-	 */
-	while (1) {
-		parent = pe->parent;
-		if (pe->type & EEH_PE_PHB)
-			break;
-
-		if (purge_pe) {
-			if (list_empty(&pe->edevs) &&
-			    list_empty(&pe->child_list)) {
-				list_del(&pe->child);
-				kfree(pe);
-			} else {
-				break;
-			}
-		} else {
-			if (list_empty(&pe->edevs)) {
-				cnt = 0;
-				list_for_each_entry(child, &pe->child_list, child) {
-					if (!(child->type & EEH_PE_INVALID)) {
-						cnt++;
-						break;
-					}
-				}
-
-				if (!cnt)
-					pe->type |= EEH_PE_INVALID;
-				else
-					break;
-			}
-		}
-
-		pe = parent;
-	}
-
-	eeh_unlock();
-
-	return 0;
-}
-
-/**
- * __eeh_pe_state_mark - Mark the state for the PE
- * @data: EEH PE
- * @flag: state
- *
- * The function is used to mark the indicated state for the given
- * PE. Also, the associated PCI devices will be put into IO frozen
- * state as well.
- */
-static void *__eeh_pe_state_mark(void *data, void *flag)
-{
-	struct eeh_pe *pe = (struct eeh_pe *)data;
-	int state = *((int *)flag);
-	struct eeh_dev *tmp;
-	struct pci_dev *pdev;
-
-	/*
-	 * Mark the PE with the indicated state. Also,
-	 * the associated PCI device will be put into
-	 * I/O frozen state to avoid I/O accesses from
-	 * the PCI device driver.
-	 */
-	pe->state |= state;
-	eeh_pe_for_each_dev(pe, tmp) {
-		pdev = eeh_dev_to_pci_dev(tmp);
-		if (pdev)
-			pdev->error_state = pci_channel_io_frozen;
-	}
-
-	return NULL;
-}
-
-/**
- * eeh_pe_state_mark - Mark specified state for PE and its associated device
- * @pe: EEH PE
- *
- * EEH error affects the current PE and its child PEs. The function
- * is used to mark appropriate state for the affected PEs and the
- * associated devices.
- */
-void eeh_pe_state_mark(struct eeh_pe *pe, int state)
-{
-	eeh_lock();
-	eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
-	eeh_unlock();
-}
-
-/**
- * __eeh_pe_state_clear - Clear state for the PE
- * @data: EEH PE
- * @flag: state
- *
- * The function is used to clear the indicated state from the
- * given PE. Besides, we also clear the check count of the PE
- * as well.
- */
-static void *__eeh_pe_state_clear(void *data, void *flag)
-{
-	struct eeh_pe *pe = (struct eeh_pe *)data;
-	int state = *((int *)flag);
-
-	pe->state &= ~state;
-	pe->check_count = 0;
-
-	return NULL;
-}
-
-/**
- * eeh_pe_state_clear - Clear state for the PE and its children
- * @pe: PE
- * @state: state to be cleared
- *
- * When the PE and its children has been recovered from error,
- * we need clear the error state for that. The function is used
- * for the purpose.
- */
-void eeh_pe_state_clear(struct eeh_pe *pe, int state)
-{
-	eeh_lock();
-	eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
-	eeh_unlock();
-}
-
-/**
- * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
- * @data: EEH device
- * @flag: Unused
- *
- * Loads the PCI configuration space base address registers,
- * the expansion ROM base address, the latency timer, and etc.
- * from the saved values in the device node.
- */
-static void *eeh_restore_one_device_bars(void *data, void *flag)
-{
-	int i;
-	u32 cmd;
-	struct eeh_dev *edev = (struct eeh_dev *)data;
-	struct device_node *dn = eeh_dev_to_of_node(edev);
-
-	for (i = 4; i < 10; i++)
-		eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
-	/* 12 == Expansion ROM Address */
-	eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
-
-#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
-#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
-
-	eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
-		SAVED_BYTE(PCI_CACHE_LINE_SIZE));
-	eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
-		SAVED_BYTE(PCI_LATENCY_TIMER));
-
-	/* max latency, min grant, interrupt pin and line */
-	eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
-
-	/*
-	 * Restore PERR & SERR bits, some devices require it,
-	 * don't touch the other command bits
-	 */
-	eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
-	if (edev->config_space[1] & PCI_COMMAND_PARITY)
-		cmd |= PCI_COMMAND_PARITY;
-	else
-		cmd &= ~PCI_COMMAND_PARITY;
-	if (edev->config_space[1] & PCI_COMMAND_SERR)
-		cmd |= PCI_COMMAND_SERR;
-	else
-		cmd &= ~PCI_COMMAND_SERR;
-	eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
-
-	return NULL;
-}
-
-/**
- * eeh_pe_restore_bars - Restore the PCI config space info
- * @pe: EEH PE
- *
- * This routine performs a recursive walk to the children
- * of this device as well.
- */
-void eeh_pe_restore_bars(struct eeh_pe *pe)
-{
-	/*
-	 * We needn't take the EEH lock since eeh_pe_dev_traverse()
-	 * will take that.
-	 */
-	eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
-}
-
-/**
- * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
- * @pe: EEH PE
- *
- * Retrieve the PCI bus according to the given PE. Basically,
- * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
- * primary PCI bus will be retrieved. The parent bus will be
- * returned for BUS PE. However, we don't have associated PCI
- * bus for DEVICE PE.
- */
-struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
-{
-	struct pci_bus *bus = NULL;
-	struct eeh_dev *edev;
-	struct pci_dev *pdev;
-
-	eeh_lock();
-
-	if (pe->type & EEH_PE_PHB) {
-		bus = pe->phb->bus;
-	} else if (pe->type & EEH_PE_BUS) {
-		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
-		pdev = eeh_dev_to_pci_dev(edev);
-		if (pdev)
-			bus = pdev->bus;
-	}
-
-	eeh_unlock();
-
-	return bus;
-}
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index b456b157d33..0bec0c02c5e 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -133,6 +133,78 @@ static int pseries_eeh_init(void)
 	return 0;
 }
 
+static int pseries_eeh_cap_start(struct device_node *dn)
+{
+	struct pci_dn *pdn = PCI_DN(dn);
+	u32 status;
+
+	if (!pdn)
+		return 0;
+
+	rtas_read_config(pdn, PCI_STATUS, 2, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	return PCI_CAPABILITY_LIST;
+}
+
+
+static int pseries_eeh_find_cap(struct device_node *dn, int cap)
+{
+	struct pci_dn *pdn = PCI_DN(dn);
+	int pos = pseries_eeh_cap_start(dn);
+	int cnt = 48;	/* Maximal number of capabilities */
+	u32 id;
+
+	if (!pos)
+		return 0;
+
+        while (cnt--) {
+		rtas_read_config(pdn, pos, 1, &pos);
+		if (pos < 0x40)
+			break;
+		pos &= ~3;
+		rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+		pos += PCI_CAP_LIST_NEXT;
+	}
+
+	return 0;
+}
+
+static int pseries_eeh_find_ecap(struct device_node *dn, int cap)
+{
+	struct pci_dn *pdn = PCI_DN(dn);
+	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	u32 header;
+	int pos = 256;
+	int ttl = (4096 - 256) / 8;
+
+	if (!edev || !edev->pcie_cap)
+		return 0;
+	if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+		return 0;
+	else if (!header)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap && pos)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < 256)
+			break;
+
+		if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+			break;
+	}
+
+	return 0;
+}
+
 /**
  * pseries_eeh_of_probe - EEH probe on the given device
  * @dn: OF node
@@ -146,30 +218,54 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 {
 	struct eeh_dev *edev;
 	struct eeh_pe pe;
-	const u32 *class_code, *vendor_id, *device_id;
-	const u32 *regs;
+	struct pci_dn *pdn = PCI_DN(dn);
+	const __be32 *classp, *vendorp, *devicep;
+	u32 class_code;
+	const __be32 *regs;
+	u32 pcie_flags;
 	int enable = 0;
 	int ret;
 
 	/* Retrieve OF node and eeh device */
 	edev = of_node_to_eeh_dev(dn);
-	if (!of_device_is_available(dn))
+	if (edev->pe || !of_device_is_available(dn))
 		return NULL;
 
 	/* Retrieve class/vendor/device IDs */
-	class_code = of_get_property(dn, "class-code", NULL);
-	vendor_id  = of_get_property(dn, "vendor-id", NULL);
-	device_id  = of_get_property(dn, "device-id", NULL);
+	classp = of_get_property(dn, "class-code", NULL);
+	vendorp = of_get_property(dn, "vendor-id", NULL);
+	devicep = of_get_property(dn, "device-id", NULL);
 
 	/* Skip for bad OF node or PCI-ISA bridge */
-	if (!class_code || !vendor_id || !device_id)
+	if (!classp || !vendorp || !devicep)
 		return NULL;
 	if (dn->type && !strcmp(dn->type, "isa"))
 		return NULL;
 
-	/* Update class code and mode of eeh device */
-	edev->class_code = *class_code;
-	edev->mode = 0;
+	class_code = of_read_number(classp, 1);
+
+	/*
+	 * Update class code and mode of eeh device. We need
+	 * correctly reflects that current device is root port
+	 * or PCIe switch downstream port.
+	 */
+	edev->class_code = class_code;
+	edev->pcix_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_PCIX);
+	edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
+	edev->aer_cap = pseries_eeh_find_ecap(dn, PCI_EXT_CAP_ID_ERR);
+	edev->mode &= 0xFFFFFF00;
+	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		edev->mode |= EEH_DEV_BRIDGE;
+		if (edev->pcie_cap) {
+			rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+					 2, &pcie_flags);
+			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+				edev->mode |= EEH_DEV_ROOT_PORT;
+			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+				edev->mode |= EEH_DEV_DS_PORT;
+		}
+	}
 
 	/* Retrieve the device address */
 	regs = of_get_property(dn, "reg", NULL);
@@ -182,12 +278,12 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 	/* Initialize the fake PE */
 	memset(&pe, 0, sizeof(struct eeh_pe));
 	pe.phb = edev->phb;
-	pe.config_addr = regs[0];
+	pe.config_addr = of_read_number(regs, 1);
 
 	/* Enable EEH on the device */
 	ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
 	if (!ret) {
-		edev->config_addr = regs[0];
+		edev->config_addr = of_read_number(regs, 1);
 		/* Retrieve PE address */
 		edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
 		pe.addr = edev->pe_config_addr;
@@ -201,7 +297,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 			enable = 1;
 
 		if (enable) {
-			eeh_subsystem_enabled = 1;
+			eeh_set_enable(true);
 			eeh_add_to_parent_pe(edev);
 
 			pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n",
@@ -400,6 +496,7 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *state)
 			} else {
 				result = EEH_STATE_NOT_SUPPORT;
 			}
+			break;
 		default:
 			result = EEH_STATE_NOT_SUPPORT;
 		}
@@ -435,11 +532,19 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option)
 	/* If fundamental-reset not supported, try hot-reset */
 	if (option == EEH_RESET_FUNDAMENTAL &&
 	    ret == -8) {
+		option = EEH_RESET_HOT;
 		ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
 				config_addr, BUID_HI(pe->phb->buid),
-				BUID_LO(pe->phb->buid), EEH_RESET_HOT);
+				BUID_LO(pe->phb->buid), option);
 	}
 
+	/* We need reset hold or settlement delay */
+	if (option == EEH_RESET_FUNDAMENTAL ||
+	    option == EEH_RESET_HOT)
+		msleep(EEH_PE_RST_HOLD_TIME);
+	else
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
 	return ret;
 }
 
@@ -625,7 +730,9 @@ static struct eeh_ops pseries_eeh_ops = {
 	.get_log		= pseries_eeh_get_log,
 	.configure_bridge       = pseries_eeh_configure_bridge,
 	.read_config		= pseries_eeh_read_config,
-	.write_config		= pseries_eeh_write_config
+	.write_config		= pseries_eeh_write_config,
+	.next_error		= NULL,
+	.restore_config		= NULL
 };
 
 /**
diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c
deleted file mode 100644
index d37708360f2..00000000000
--- a/arch/powerpc/platforms/pseries/eeh_sysfs.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
- * Copyright IBM Corporation 2007
- * Copyright Linas Vepstas <linas@austin.ibm.com> 2007
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
- */
-#include <linux/pci.h>
-#include <linux/stat.h>
-#include <asm/ppc-pci.h>
-#include <asm/pci-bridge.h>
-
-/**
- * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
- * @_name: name of file in sysfs directory
- * @_memb: name of member in struct pci_dn to access
- * @_format: printf format for display
- *
- * All of the attributes look very similar, so just
- * auto-gen a cut-n-paste routine to display them.
- */
-#define EEH_SHOW_ATTR(_name,_memb,_format)               \
-static ssize_t eeh_show_##_name(struct device *dev,      \
-		struct device_attribute *attr, char *buf)          \
-{                                                        \
-	struct pci_dev *pdev = to_pci_dev(dev);               \
-	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);      \
-	                                                      \
-	if (!edev)                                            \
-		return 0;                                     \
-	                                                      \
-	return sprintf(buf, _format "\n", edev->_memb);       \
-}                                                        \
-static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
-
-EEH_SHOW_ATTR(eeh_mode,            mode,            "0x%x");
-EEH_SHOW_ATTR(eeh_config_addr,     config_addr,     "0x%x");
-EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
-
-void eeh_sysfs_add_device(struct pci_dev *pdev)
-{
-	int rc=0;
-
-	rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
-	rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
-	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
-
-	if (rc)
-		printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
-}
-
-void eeh_sysfs_remove_device(struct pci_dev *pdev)
-{
-	device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
-	device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
-	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
-}
-
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
index 2605c310166..18380e8f6df 100644
--- a/arch/powerpc/platforms/pseries/event_sources.c
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -25,7 +25,7 @@ void request_event_sources_irqs(struct device_node *np,
 				const char *name)
 {
 	int i, index, count = 0;
-	struct of_irq oirq;
+	struct of_phandle_args oirq;
 	const u32 *opicprop;
 	unsigned int opicplen;
 	unsigned int virqs[16];
@@ -55,13 +55,11 @@ void request_event_sources_irqs(struct device_node *np,
 	/* Else use normal interrupt tree parsing */
 	else {
 		/* First try to do a proper OF tree parsing */
-		for (index = 0; of_irq_map_one(np, index, &oirq) == 0;
+		for (index = 0; of_irq_parse_one(np, index, &oirq) == 0;
 		     index++) {
 			if (count > 15)
 				break;
-			virqs[count] = irq_create_of_mapping(oirq.controller,
-							    oirq.specifier,
-							    oirq.size);
+			virqs[count] = irq_create_of_mapping(&oirq);
 			if (virqs[count] == NO_IRQ) {
 				pr_err("event-sources: Unable to allocate "
 				       "interrupt number for %s\n",
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 217ca5c75b2..20d62975856 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -30,16 +30,12 @@
 #include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
 #include <asm/xics.h>
-#include "plpar_wrappers.h"
+#include <asm/plpar_wrappers.h>
+
 #include "offline_states.h"
 
 /* This version can't take the spinlock, because it never returns */
-static struct rtas_args rtas_stop_self_args = {
-	.token = RTAS_UNKNOWN_SERVICE,
-	.nargs = 0,
-	.nret = 1,
-	.rets = &rtas_stop_self_args.args[0],
-};
+static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
 
 static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
 							CPU_STATE_OFFLINE;
@@ -92,15 +88,21 @@ void set_default_offline_state(int cpu)
 
 static void rtas_stop_self(void)
 {
-	struct rtas_args *args = &rtas_stop_self_args;
+	static struct rtas_args args = {
+		.nargs = 0,
+		.nret = 1,
+		.rets = &args.args[0],
+	};
+
+	args.token = cpu_to_be32(rtas_stop_self_token);
 
 	local_irq_disable();
 
-	BUG_ON(args->token == RTAS_UNKNOWN_SERVICE);
+	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
 
 	printk("cpu %u (hwid %u) Ready to die...\n",
 	       smp_processor_id(), hard_smp_processor_id());
-	enter_rtas(__pa(args));
+	enter_rtas(__pa(&args));
 
 	panic("Alas, I survived.\n");
 }
@@ -123,7 +125,7 @@ static void pseries_mach_cpu_die(void)
 		cede_latency_hint = 2;
 
 		get_lppaca()->idle = 1;
-		if (!get_lppaca()->shared_proc)
+		if (!lppaca_shared_proc(get_lppaca()))
 			get_lppaca()->donate_dedicated_cpu = 1;
 
 		while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
@@ -137,7 +139,7 @@ static void pseries_mach_cpu_die(void)
 
 		local_irq_disable();
 
-		if (!get_lppaca()->shared_proc)
+		if (!lppaca_shared_proc(get_lppaca()))
 			get_lppaca()->donate_dedicated_cpu = 0;
 		get_lppaca()->idle = 0;
 
@@ -391,10 +393,10 @@ static int __init pseries_cpu_hotplug_init(void)
 		}
 	}
 
-	rtas_stop_self_args.token = rtas_token("stop-self");
+	rtas_stop_self_token = rtas_token("stop-self");
 	qcss_tok = rtas_token("query-cpu-stopped-state");
 
-	if (rtas_stop_self_args.token == RTAS_UNKNOWN_SERVICE ||
+	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
 			qcss_tok == RTAS_UNKNOWN_SERVICE) {
 		printk(KERN_INFO "CPU Hotplug not supported by firmware "
 				"- disabling.\n");
@@ -419,4 +421,4 @@ static int __init pseries_cpu_hotplug_init(void)
 
 	return 0;
 }
-arch_initcall(pseries_cpu_hotplug_init);
+machine_arch_initcall(pseries, pseries_cpu_hotplug_init);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 9a432de363b..7995135170a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -10,15 +10,18 @@
  */
 
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/memblock.h>
 #include <linux/vmalloc.h>
 #include <linux/memory.h>
+#include <linux/memory_hotplug.h>
 
 #include <asm/firmware.h>
 #include <asm/machdep.h>
+#include <asm/prom.h>
 #include <asm/sparsemem.h>
 
-static unsigned long get_memblock_size(void)
+unsigned long pseries_memory_block_size(void)
 {
 	struct device_node *np;
 	unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE;
@@ -61,72 +64,53 @@ static unsigned long get_memblock_size(void)
 	return memblock_size;
 }
 
-/* WARNING: This is going to override the generic definition whenever
- * pseries is built-in regardless of what platform is active at boot
- * time. This is fine for now as this is the only "option" and it
- * should work everywhere. If not, we'll have to turn this into a
- * ppc_md. callback
- */
-unsigned long memory_block_size_bytes(void)
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static int pseries_remove_memory(u64 start, u64 size)
 {
-	return get_memblock_size();
+	int ret;
+
+	/* Remove htab bolted mappings for this section of memory */
+	start = (unsigned long)__va(start);
+	ret = remove_section_mapping(start, start + size);
+
+	/* Ensure all vmalloc mappings are flushed in case they also
+	 * hit that section of memory
+	 */
+	vm_unmap_aliases();
+
+	return ret;
 }
 
-#ifdef CONFIG_MEMORY_HOTREMOVE
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
-	unsigned long start, start_pfn;
-	struct zone *zone;
-	int ret;
-	unsigned long section;
-	unsigned long sections_to_remove;
+	unsigned long block_sz, start_pfn;
+	int sections_per_block;
+	int i, nid;
 
 	start_pfn = base >> PAGE_SHIFT;
 
-	if (!pfn_valid(start_pfn)) {
-		memblock_remove(base, memblock_size);
-		return 0;
-	}
+	lock_device_hotplug();
 
-	zone = page_zone(pfn_to_page(start_pfn));
+	if (!pfn_valid(start_pfn))
+		goto out;
 
-	/*
-	 * Remove section mappings and sysfs entries for the
-	 * section of the memory we are removing.
-	 *
-	 * NOTE: Ideally, this should be done in generic code like
-	 * remove_memory(). But remove_memory() gets called by writing
-	 * to sysfs "state" file and we can't remove sysfs entries
-	 * while writing to it. So we have to defer it to here.
-	 */
-	sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION;
-	for (section = 0; section < sections_to_remove; section++) {
-		unsigned long pfn = start_pfn + section * PAGES_PER_SECTION;
-		ret = __remove_pages(zone, pfn, PAGES_PER_SECTION);
-		if (ret)
-			return ret;
+	block_sz = pseries_memory_block_size();
+	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+	nid = memory_add_physaddr_to_nid(base);
+
+	for (i = 0; i < sections_per_block; i++) {
+		remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
+		base += MIN_MEMORY_BLOCK_SIZE;
 	}
 
-	/*
-	 * Update memory regions for memory remove
-	 */
+out:
+	/* Update memory regions for memory remove */
 	memblock_remove(base, memblock_size);
-
-	/*
-	 * Remove htab bolted mappings for this section of memory
-	 */
-	start = (unsigned long)__va(base);
-	ret = remove_section_mapping(start, start + memblock_size);
-
-	/* Ensure all vmalloc mappings are flushed in case they also
-	 * hit that section of memory
-	 */
-	vm_unmap_aliases();
-
-	return ret;
+	unlock_device_hotplug();
+	return 0;
 }
 
-static int pseries_remove_memory(struct device_node *np)
+static int pseries_remove_mem_node(struct device_node *np)
 {
 	const char *type;
 	const unsigned int *regs;
@@ -151,8 +135,8 @@ static int pseries_remove_memory(struct device_node *np)
 	base = *(unsigned long *)regs;
 	lmb_size = regs[3];
 
-	ret = pseries_remove_memblock(base, lmb_size);
-	return ret;
+	pseries_remove_memblock(base, lmb_size);
+	return 0;
 }
 #else
 static inline int pseries_remove_memblock(unsigned long base,
@@ -160,13 +144,13 @@ static inline int pseries_remove_memblock(unsigned long base,
 {
 	return -EOPNOTSUPP;
 }
-static inline int pseries_remove_memory(struct device_node *np)
+static inline int pseries_remove_mem_node(struct device_node *np)
 {
 	return -EOPNOTSUPP;
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
-static int pseries_add_memory(struct device_node *np)
+static int pseries_add_mem_node(struct device_node *np)
 {
 	const char *type;
 	const unsigned int *regs;
@@ -206,7 +190,7 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
 	u32 *p;
 	int i, rc = -EINVAL;
 
-	memblock_size = get_memblock_size();
+	memblock_size = pseries_memory_block_size();
 	if (!memblock_size)
 		return -EINVAL;
 
@@ -252,10 +236,10 @@ static int pseries_memory_notifier(struct notifier_block *nb,
 
 	switch (action) {
 	case OF_RECONFIG_ATTACH_NODE:
-		err = pseries_add_memory(node);
+		err = pseries_add_mem_node(node);
 		break;
 	case OF_RECONFIG_DETACH_NODE:
-		err = pseries_remove_memory(node);
+		err = pseries_remove_mem_node(node);
 		break;
 	case OF_RECONFIG_UPDATE_PROPERTY:
 		pr = (struct of_prop_reconfig *)node;
@@ -275,6 +259,10 @@ static int __init pseries_memory_hotplug_init(void)
 	if (firmware_has_feature(FW_FEATURE_LPAR))
 		of_reconfig_notifier_register(&pseries_mem_nb);
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+	ppc_md.remove_memory = pseries_remove_memory;
+#endif
+
 	return 0;
 }
 machine_device_initcall(pseries, pseries_memory_hotplug_init);
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 444fe7759e5..99ecf0a5a92 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -49,7 +49,7 @@ END_FTR_SECTION(0, 1);						\
 	std	r0,16(r1);					\
 	addi	r4,r1,STK_PARAM(FIRST_REG);			\
 	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
-	bl	.__trace_hcall_entry;				\
+	bl	__trace_hcall_entry;				\
 	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
 	ld	r0,16(r1);					\
 	ld	r3,STK_PARAM(R3)(r1);				\
@@ -83,7 +83,7 @@ END_FTR_SECTION(0, 1);						\
 	mr	r3,r6;						\
 	std	r0,16(r1);					\
 	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
-	bl	.__trace_hcall_exit;				\
+	bl	__trace_hcall_exit;				\
 	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
 	ld	r0,16(r1);					\
 	ld	r3,STK_PARAM(R3)(r1);				\
@@ -106,7 +106,7 @@ END_FTR_SECTION(0, 1);						\
 
 	.text
 
-_GLOBAL(plpar_hcall_norets)
+_GLOBAL_TOC(plpar_hcall_norets)
 	HMT_MEDIUM
 
 	mfcr	r0
@@ -122,7 +122,7 @@ _GLOBAL(plpar_hcall_norets)
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
 
-_GLOBAL(plpar_hcall)
+_GLOBAL_TOC(plpar_hcall)
 	HMT_MEDIUM
 
 	mfcr	r0
@@ -188,7 +188,7 @@ _GLOBAL(plpar_hcall_raw)
 
 	blr				/* return r3 = status */
 
-_GLOBAL(plpar_hcall9)
+_GLOBAL_TOC(plpar_hcall9)
 	HMT_MEDIUM
 
 	mfcr	r0
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
index b344f94b040..849b29b3e9a 100644
--- a/arch/powerpc/platforms/pseries/hvconsole.c
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -28,7 +28,7 @@
 #include <linux/errno.h>
 #include <asm/hvcall.h>
 #include <asm/hvconsole.h>
-#include "plpar_wrappers.h"
+#include <asm/plpar_wrappers.h>
 
 /**
  * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper
@@ -40,10 +40,16 @@
  */
 int hvc_get_chars(uint32_t vtermno, char *buf, int count)
 {
-	unsigned long got;
+	long ret;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	unsigned long *lbuf = (unsigned long *)buf;
+
+	ret = plpar_hcall(H_GET_TERM_CHAR, retbuf, vtermno);
+	lbuf[0] = be64_to_cpu(retbuf[1]);
+	lbuf[1] = be64_to_cpu(retbuf[2]);
 
-	if (plpar_get_term_char(vtermno, &got, buf) == H_SUCCESS)
-		return got;
+	if (ret == H_SUCCESS)
+		return retbuf[0];
 
 	return 0;
 }
@@ -69,8 +75,9 @@ int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
 	if (count > MAX_VIO_PUT_CHARS)
 		count = MAX_VIO_PUT_CHARS;
 
-	ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, lbuf[0],
-				 lbuf[1]);
+	ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count,
+				 cpu_to_be64(lbuf[0]),
+				 cpu_to_be64(lbuf[1]));
 	if (ret == H_SUCCESS)
 		return count;
 	if (ret == H_BUSY)
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
index ef9d9d84c7d..0240c4ff878 100644
--- a/arch/powerpc/platforms/pseries/io_event_irq.c
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -82,9 +82,9 @@ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
 	 * RTAS_TYPE_IO only exists in extended event log version 6 or later.
 	 * No need to check event log version.
 	 */
-	if (unlikely(elog->type != RTAS_TYPE_IO)) {
-		printk_once(KERN_WARNING "io_event_irq: Unexpected event type %d",
-			    elog->type);
+	if (unlikely(rtas_error_type(elog) != RTAS_TYPE_IO)) {
+		printk_once(KERN_WARNING"io_event_irq: Unexpected event type %d",
+			    rtas_error_type(elog));
 		return NULL;
 	}
 
@@ -115,7 +115,7 @@ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
  *   by scope or event type alone. For example, Torrent ISR route change
  *   event is reported with scope 0x00 (Not Applicatable) rather than
  *   0x3B (Torrent-hub). It is better to let the clients to identify
- *   who owns the the event.
+ *   who owns the event.
  */
 
 static irqreturn_t ioei_interrupt(int irq, void *dev_id)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 86ae364900d..33b552ffbe5 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -48,12 +48,11 @@
 #include <asm/ppc-pci.h>
 #include <asm/udbg.h>
 #include <asm/mmzone.h>
-
-#include "plpar_wrappers.h"
+#include <asm/plpar_wrappers.h>
 
 
 static void tce_invalidate_pSeries_sw(struct iommu_table *tbl,
-				      u64 *startp, u64 *endp)
+				      __be64 *startp, __be64 *endp)
 {
 	u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
 	unsigned long start, end, inc;
@@ -87,7 +86,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
 			      struct dma_attrs *attrs)
 {
 	u64 proto_tce;
-	u64 *tcep, *tces;
+	__be64 *tcep, *tces;
 	u64 rpn;
 
 	proto_tce = TCE_PCI_READ; // Read allowed
@@ -95,12 +94,12 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
 	if (direction != DMA_TO_DEVICE)
 		proto_tce |= TCE_PCI_WRITE;
 
-	tces = tcep = ((u64 *)tbl->it_base) + index;
+	tces = tcep = ((__be64 *)tbl->it_base) + index;
 
 	while (npages--) {
 		/* can't move this out since we might cross MEMBLOCK boundary */
 		rpn = __pa(uaddr) >> TCE_SHIFT;
-		*tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
+		*tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
 
 		uaddr += TCE_PAGE_SIZE;
 		tcep++;
@@ -114,9 +113,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
 
 static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
 {
-	u64 *tcep, *tces;
+	__be64 *tcep, *tces;
 
-	tces = tcep = ((u64 *)tbl->it_base) + index;
+	tces = tcep = ((__be64 *)tbl->it_base) + index;
 
 	while (npages--)
 		*(tcep++) = 0;
@@ -127,11 +126,11 @@ static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
 
 static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
 {
-	u64 *tcep;
+	__be64 *tcep;
 
-	tcep = ((u64 *)tbl->it_base) + index;
+	tcep = ((__be64 *)tbl->it_base) + index;
 
-	return *tcep;
+	return be64_to_cpu(*tcep);
 }
 
 static void tce_free_pSeriesLP(struct iommu_table*, long, long);
@@ -178,7 +177,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	return ret;
 }
 
-static DEFINE_PER_CPU(u64 *, tce_page);
+static DEFINE_PER_CPU(__be64 *, tce_page);
 
 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 				     long npages, unsigned long uaddr,
@@ -187,7 +186,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 {
 	u64 rc = 0;
 	u64 proto_tce;
-	u64 *tcep;
+	__be64 *tcep;
 	u64 rpn;
 	long l, limit;
 	long tcenum_start = tcenum, npages_start = npages;
@@ -207,7 +206,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	 * from iommu_alloc{,_sg}()
 	 */
 	if (!tcep) {
-		tcep = (u64 *)__get_free_page(GFP_ATOMIC);
+		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
 		/* If allocation fails, fall back to the loop implementation */
 		if (!tcep) {
 			local_irq_restore(flags);
@@ -231,7 +230,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
 
 		for (l = 0; l < limit; l++) {
-			tcep[l] = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
+			tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT);
 			rpn++;
 		}
 
@@ -330,16 +329,16 @@ struct direct_window {
 
 /* Dynamic DMA Window support */
 struct ddw_query_response {
-	u32 windows_available;
-	u32 largest_available_block;
-	u32 page_size;
-	u32 migration_capable;
+	__be32 windows_available;
+	__be32 largest_available_block;
+	__be32 page_size;
+	__be32 migration_capable;
 };
 
 struct ddw_create_response {
-	u32 liobn;
-	u32 addr_hi;
-	u32 addr_lo;
+	__be32 liobn;
+	__be32 addr_hi;
+	__be32 addr_lo;
 };
 
 static LIST_HEAD(direct_window_list);
@@ -393,7 +392,8 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
 					unsigned long num_pfn, const void *arg)
 {
 	const struct dynamic_dma_window_prop *maprange = arg;
-	u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn;
+	u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn;
+	__be64 *tcep;
 	u32 tce_shift;
 	u64 rc = 0;
 	long l, limit;
@@ -402,7 +402,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
 	tcep = __get_cpu_var(tce_page);
 
 	if (!tcep) {
-		tcep = (u64 *)__get_free_page(GFP_ATOMIC);
+		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
 		if (!tcep) {
 			local_irq_enable();
 			return -ENOMEM;
@@ -436,7 +436,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
 		dma_offset = next + be64_to_cpu(maprange->dma_base);
 
 		for (l = 0; l < limit; l++) {
-			tcep[l] = proto_tce | next;
+			tcep[l] = cpu_to_be64(proto_tce | next);
 			next += tce_size;
 		}
 
@@ -486,9 +486,10 @@ static void iommu_table_setparms(struct pci_controller *phb,
 		memset((void *)tbl->it_base, 0, *sizep);
 
 	tbl->it_busno = phb->bus->number;
+	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
 
 	/* Units of tce entries */
-	tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT;
+	tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift;
 
 	/* Test if we are going over 2GB of DMA space */
 	if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
@@ -499,7 +500,7 @@ static void iommu_table_setparms(struct pci_controller *phb,
 	phb->dma_window_base_cur += phb->dma_window_size;
 
 	/* Set the tce table size - measured in entries */
-	tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT;
+	tbl->it_size = phb->dma_window_size >> tbl->it_page_shift;
 
 	tbl->it_index = 0;
 	tbl->it_blocksize = 16;
@@ -530,18 +531,19 @@ static void iommu_table_setparms(struct pci_controller *phb,
 static void iommu_table_setparms_lpar(struct pci_controller *phb,
 				      struct device_node *dn,
 				      struct iommu_table *tbl,
-				      const void *dma_window)
+				      const __be32 *dma_window)
 {
 	unsigned long offset, size;
 
 	of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size);
 
 	tbl->it_busno = phb->bus->number;
+	tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
 	tbl->it_base   = 0;
 	tbl->it_blocksize  = 16;
 	tbl->it_type = TCE_PCI;
-	tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
-	tbl->it_size = size >> IOMMU_PAGE_SHIFT;
+	tbl->it_offset = offset >> tbl->it_page_shift;
+	tbl->it_size = size >> tbl->it_page_shift;
 }
 
 static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
@@ -614,6 +616,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 
 	iommu_table_setparms(pci->phb, dn, tbl);
 	pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
+	iommu_register_group(tbl, pci_domain_nr(bus), 0);
 
 	/* Divide the rest (1.75GB) among the children */
 	pci->phb->dma_window_size = 0x80000000ul;
@@ -629,7 +632,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 	struct iommu_table *tbl;
 	struct device_node *dn, *pdn;
 	struct pci_dn *ppci;
-	const void *dma_window = NULL;
+	const __be32 *dma_window = NULL;
 
 	dn = pci_bus_to_OF_node(bus);
 
@@ -658,6 +661,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 				   ppci->phb->node);
 		iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
 		ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
+		iommu_register_group(tbl, pci_domain_nr(bus), 0);
 		pr_debug("  created table: %p\n", ppci->iommu_table);
 	}
 }
@@ -684,7 +688,9 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
 				   phb->node);
 		iommu_table_setparms(phb, dn, tbl);
 		PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
-		set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
+		iommu_register_group(tbl, pci_domain_nr(phb->bus), 0);
+		set_iommu_table_base_and_group(&dev->dev,
+					       PCI_DN(dn)->iommu_table);
 		return;
 	}
 
@@ -696,7 +702,8 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
 		dn = dn->parent;
 
 	if (dn && PCI_DN(dn))
-		set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
+		set_iommu_table_base_and_group(&dev->dev,
+					       PCI_DN(dn)->iommu_table);
 	else
 		printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
 		       pci_name(dev));
@@ -714,21 +721,6 @@ static int __init disable_ddw_setup(char *str)
 
 early_param("disable_ddw", disable_ddw_setup);
 
-static inline void __remove_ddw(struct device_node *np, const u32 *ddw_avail, u64 liobn)
-{
-	int ret;
-
-	ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
-	if (ret)
-		pr_warning("%s: failed to remove DMA window: rtas returned "
-			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np->full_name, ret, ddw_avail[2], liobn);
-	else
-		pr_debug("%s: successfully removed DMA window: rtas returned "
-			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np->full_name, ret, ddw_avail[2], liobn);
-}
-
 static void remove_ddw(struct device_node *np)
 {
 	struct dynamic_dma_window_prop *dwp;
@@ -758,7 +750,15 @@ static void remove_ddw(struct device_node *np)
 		pr_debug("%s successfully cleared tces in window.\n",
 			 np->full_name);
 
-	__remove_ddw(np, ddw_avail, liobn);
+	ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
+	if (ret)
+		pr_warning("%s: failed to remove direct window: rtas returned "
+			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
+			np->full_name, ret, ddw_avail[2], liobn);
+	else
+		pr_debug("%s: successfully removed direct window: rtas returned "
+			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
+			np->full_name, ret, ddw_avail[2], liobn);
 
 delprop:
 	ret = of_remove_property(np, win64);
@@ -778,7 +778,7 @@ static u64 find_existing_ddw(struct device_node *pdn)
 	list_for_each_entry(window, &direct_window_list, list) {
 		if (window->device == pdn) {
 			direct64 = window->prop;
-			dma_addr = direct64->dma_base;
+			dma_addr = be64_to_cpu(direct64->dma_base);
 			break;
 		}
 	}
@@ -787,68 +787,33 @@ static u64 find_existing_ddw(struct device_node *pdn)
 	return dma_addr;
 }
 
-static void __restore_default_window(struct eeh_dev *edev,
-						u32 ddw_restore_token)
-{
-	u32 cfg_addr;
-	u64 buid;
-	int ret;
-
-	/*
-	 * Get the config address and phb buid of the PE window.
-	 * Rely on eeh to retrieve this for us.
-	 * Retrieve them from the pci device, not the node with the
-	 * dma-window property
-	 */
-	cfg_addr = edev->config_addr;
-	if (edev->pe_config_addr)
-		cfg_addr = edev->pe_config_addr;
-	buid = edev->phb->buid;
-
-	do {
-		ret = rtas_call(ddw_restore_token, 3, 1, NULL, cfg_addr,
-					BUID_HI(buid), BUID_LO(buid));
-	} while (rtas_busy_delay(ret));
-	pr_info("ibm,reset-pe-dma-windows(%x) %x %x %x returned %d\n",
-		 ddw_restore_token, cfg_addr, BUID_HI(buid), BUID_LO(buid), ret);
-}
-
 static int find_existing_ddw_windows(void)
 {
+	int len;
 	struct device_node *pdn;
+	struct direct_window *window;
 	const struct dynamic_dma_window_prop *direct64;
-	const u32 *ddw_extensions;
 
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		return 0;
 
 	for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
-		direct64 = of_get_property(pdn, DIRECT64_PROPNAME, NULL);
+		direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
 		if (!direct64)
 			continue;
 
-		/*
-		 * We need to ensure the IOMMU table is active when we
-		 * return from the IOMMU setup so that the common code
-		 * can clear the table or find the holes. To that end,
-		 * first, remove any existing DDW configuration.
-		 */
-		remove_ddw(pdn);
+		window = kzalloc(sizeof(*window), GFP_KERNEL);
+		if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
+			kfree(window);
+			remove_ddw(pdn);
+			continue;
+		}
 
-		/*
-		 * Second, if we are running on a new enough level of
-		 * firmware where the restore API is present, use it to
-		 * restore the 32-bit window, which was removed in
-		 * create_ddw.
-		 * If the API is not present, then create_ddw couldn't
-		 * have removed the 32-bit window in the first place, so
-		 * removing the DDW configuration should be sufficient.
-		 */
-		ddw_extensions = of_get_property(pdn, "ibm,ddw-extensions",
-									NULL);
-		if (ddw_extensions && ddw_extensions[0] > 0)
-			__restore_default_window(of_node_to_eeh_dev(pdn),
-							ddw_extensions[1]);
+		window->device = pdn;
+		window->prop = direct64;
+		spin_lock(&direct_window_list_lock);
+		list_add(&window->list, &direct_window_list);
+		spin_unlock(&direct_window_list_lock);
 	}
 
 	return 0;
@@ -918,12 +883,6 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 	return ret;
 }
 
-static void restore_default_window(struct pci_dev *dev,
-					u32 ddw_restore_token)
-{
-	__restore_default_window(pci_dev_to_eeh_dev(dev), ddw_restore_token);
-}
-
 struct failed_ddw_pdn {
 	struct device_node *pdn;
 	struct list_head list;
@@ -951,13 +910,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	u64 dma_addr, max_addr;
 	struct device_node *dn;
 	const u32 *uninitialized_var(ddw_avail);
-	const u32 *uninitialized_var(ddw_extensions);
-	u32 ddw_restore_token = 0;
 	struct direct_window *window;
 	struct property *win64;
 	struct dynamic_dma_window_prop *ddwprop;
-	const void *dma_window = NULL;
-	unsigned long liobn, offset, size;
 	struct failed_ddw_pdn *fpdn;
 
 	mutex_lock(&direct_window_init_mutex);
@@ -988,42 +943,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	 */
 	ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
 	if (!ddw_avail || len < 3 * sizeof(u32))
-		goto out_unlock;
-
-	/*
-	 * the extensions property is only required to exist in certain
-	 * levels of firmware and later
-	 * the ibm,ddw-extensions property is a list with the first
-	 * element containing the number of extensions and each
-	 * subsequent entry is a value corresponding to that extension
-	 */
-	ddw_extensions = of_get_property(pdn, "ibm,ddw-extensions", &len);
-	if (ddw_extensions) {
-		/*
-		 * each new defined extension length should be added to
-		 * the top of the switch so the "earlier" entries also
-		 * get picked up
-		 */
-		switch (ddw_extensions[0]) {
-			/* ibm,reset-pe-dma-windows */
-			case 1:
-				ddw_restore_token = ddw_extensions[1];
-				break;
-		}
-	}
-
-	/*
-	 * Only remove the existing DMA window if we can restore back to
-	 * the default state. Removing the existing window maximizes the
-	 * resources available to firmware for dynamic window creation.
-	 */
-	if (ddw_restore_token) {
-		dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
-		of_parse_dma_window(pdn, dma_window, &liobn, &offset, &size);
-		__remove_ddw(pdn, ddw_avail, liobn);
-	}
+		goto out_failed;
 
-	/*
+       /*
 	 * Query if there is a second window of size to map the
 	 * whole partition.  Query returns number of windows, largest
 	 * block assigned to PE (partition endpoint), and two bitmasks
@@ -1032,7 +954,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	dn = pci_device_to_OF_node(dev);
 	ret = query_ddw(dev, ddw_avail, &query);
 	if (ret != 0)
-		goto out_restore_window;
+		goto out_failed;
 
 	if (query.windows_available == 0) {
 		/*
@@ -1041,34 +963,34 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 		 * trading in for a larger page size.
 		 */
 		dev_dbg(&dev->dev, "no free dynamic windows");
-		goto out_restore_window;
+		goto out_failed;
 	}
-	if (query.page_size & 4) {
+	if (be32_to_cpu(query.page_size) & 4) {
 		page_shift = 24; /* 16MB */
-	} else if (query.page_size & 2) {
+	} else if (be32_to_cpu(query.page_size) & 2) {
 		page_shift = 16; /* 64kB */
-	} else if (query.page_size & 1) {
+	} else if (be32_to_cpu(query.page_size) & 1) {
 		page_shift = 12; /* 4kB */
 	} else {
 		dev_dbg(&dev->dev, "no supported direct page size in mask %x",
 			  query.page_size);
-		goto out_restore_window;
+		goto out_failed;
 	}
 	/* verify the window * number of ptes will map the partition */
 	/* check largest block * page size > max memory hotplug addr */
 	max_addr = memory_hotplug_max();
-	if (query.largest_available_block < (max_addr >> page_shift)) {
+	if (be32_to_cpu(query.largest_available_block) < (max_addr >> page_shift)) {
 		dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u "
 			  "%llu-sized pages\n", max_addr,  query.largest_available_block,
 			  1ULL << page_shift);
-		goto out_restore_window;
+		goto out_failed;
 	}
 	len = order_base_2(max_addr);
 	win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
 	if (!win64) {
 		dev_info(&dev->dev,
 			"couldn't allocate property for 64bit dma window\n");
-		goto out_restore_window;
+		goto out_failed;
 	}
 	win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
 	win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
@@ -1083,7 +1005,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	if (ret != 0)
 		goto out_free_prop;
 
-	ddwprop->liobn = cpu_to_be32(create.liobn);
+	ddwprop->liobn = create.liobn;
 	ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2));
 	ddwprop->tce_shift = cpu_to_be32(page_shift);
 	ddwprop->window_shift = cpu_to_be32(len);
@@ -1130,9 +1052,7 @@ out_free_prop:
 	kfree(win64->value);
 	kfree(win64);
 
-out_restore_window:
-	if (ddw_restore_token)
-		restore_default_window(dev, ddw_restore_token);
+out_failed:
 
 	fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
 	if (!fpdn)
@@ -1149,7 +1069,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 {
 	struct device_node *pdn, *dn;
 	struct iommu_table *tbl;
-	const void *dma_window = NULL;
+	const __be32 *dma_window = NULL;
 	struct pci_dn *pci;
 
 	pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
@@ -1184,12 +1104,13 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 				   pci->phb->node);
 		iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
 		pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
+		iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0);
 		pr_debug("  created table: %p\n", pci->iommu_table);
 	} else {
 		pr_debug("  found DMA window, table: %p\n", pci->iommu_table);
 	}
 
-	set_iommu_table_base(&dev->dev, pci->iommu_table);
+	set_iommu_table_base_and_group(&dev->dev, pci->iommu_table);
 }
 
 static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
@@ -1197,7 +1118,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
 	bool ddw_enabled = false;
 	struct device_node *pdn, *dn;
 	struct pci_dev *pdev;
-	const void *dma_window = NULL;
+	const __be32 *dma_window = NULL;
 	u64 dma_offset;
 
 	if (!dev->dma_mask)
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 7d94bdc63d5..13fa95b3aa8 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -17,9 +17,9 @@
 #include <asm/mpic.h>
 #include <asm/xics.h>
 #include <asm/smp.h>
+#include <asm/plpar_wrappers.h>
 
 #include "pseries.h"
-#include "plpar_wrappers.h"
 
 static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
 {
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 6d62072a7d5..b02af9ef3ff 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -41,10 +41,17 @@
 #include <asm/smp.h>
 #include <asm/trace.h>
 #include <asm/firmware.h>
+#include <asm/plpar_wrappers.h>
 
-#include "plpar_wrappers.h"
 #include "pseries.h"
 
+/* Flag bits for H_BULK_REMOVE */
+#define HBR_REQUEST	0x4000000000000000UL
+#define HBR_RESPONSE	0x8000000000000000UL
+#define HBR_END		0xc000000000000000UL
+#define HBR_AVPN	0x0200000000000000UL
+#define HBR_ANDCOND	0x0100000000000000UL
+
 
 /* in hvCall.S */
 EXPORT_SYMBOL(plpar_hcall);
@@ -61,9 +68,18 @@ void vpa_init(int cpu)
 	struct paca_struct *pp;
 	struct dtl_entry *dtl;
 
+	/*
+	 * The spec says it "may be problematic" if CPU x registers the VPA of
+	 * CPU y. We should never do that, but wail if we ever do.
+	 */
+	WARN_ON(cpu != smp_processor_id());
+
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		lppaca_of(cpu).vmxregs_in_use = 1;
 
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		lppaca_of(cpu).ebb_regs_in_use = 1;
+
 	addr = __pa(&lppaca_of(cpu));
 	ret = register_vpa(hwcpu, addr);
 
@@ -76,7 +92,7 @@ void vpa_init(int cpu)
 	 * PAPR says this feature is SLB-Buffer but firmware never
 	 * reports that.  All SPLPAR support SLB shadow buffer.
 	 */
-	addr = __pa(&slb_shadow[cpu]);
+	addr = __pa(paca[cpu].slb_shadow_ptr);
 	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 		ret = register_slb_shadow(hwcpu, addr);
 		if (ret)
@@ -96,7 +112,7 @@ void vpa_init(int cpu)
 		lppaca_of(cpu).dtl_idx = 0;
 
 		/* hypervisor reads buffer length from this field */
-		dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
+		dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
 		ret = register_dtl(hwcpu, __pa(dtl));
 		if (ret)
 			pr_err("WARNING: DTL registration of cpu %d (hw %d) "
@@ -136,8 +152,9 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	flags = 0;
 
 	/* Make pHyp happy */
-	if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU))
-		hpte_r &= ~_PAGE_COHERENT;
+	if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
+		hpte_r &= ~HPTE_R_M;
+
 	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
 		flags |= H_COALESCE_CAND;
 
@@ -229,6 +246,23 @@ static void pSeries_lpar_hptab_clear(void)
 					&(ptes[j].pteh), &(ptes[j].ptel));
 		}
 	}
+
+#ifdef __LITTLE_ENDIAN__
+	/* Reset exceptions to big endian */
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+		long rc;
+
+		rc = pseries_big_endian_exceptions();
+		/*
+		 * At this point it is unlikely panic() will get anything
+		 * out to the user, but at least this will stop us from
+		 * continuing on further and creating an even more
+		 * difficult to debug situation.
+		 */
+		if (rc)
+			panic("Could not enable big endian exceptions");
+	}
+#endif
 }
 
 /*
@@ -240,7 +274,8 @@ static void pSeries_lpar_hptab_clear(void)
 static long pSeries_lpar_hpte_updatepp(unsigned long slot,
 				       unsigned long newpp,
 				       unsigned long vpn,
-				       int psize, int ssize, int local)
+				       int psize, int apsize,
+				       int ssize, int local)
 {
 	unsigned long lpar_rc;
 	unsigned long flags = (newpp & 7) | H_AVPN;
@@ -328,7 +363,8 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
 }
 
 static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
-					 int psize, int ssize, int local)
+					 int psize, int apsize,
+					 int ssize, int local)
 {
 	unsigned long want_v;
 	unsigned long lpar_rc;
@@ -345,6 +381,113 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
 	BUG_ON(lpar_rc != H_SUCCESS);
 }
 
+/*
+ * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
+ * to make sure that we avoid bouncing the hypervisor tlbie lock.
+ */
+#define PPC64_HUGE_HPTE_BATCH 12
+
+static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
+					     unsigned long *vpn, int count,
+					     int psize, int ssize)
+{
+	unsigned long param[8];
+	int i = 0, pix = 0, rc;
+	unsigned long flags = 0;
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+	if (lock_tlbie)
+		spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+	for (i = 0; i < count; i++) {
+
+		if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+			pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0,
+						     ssize, 0);
+		} else {
+			param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
+			param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
+			pix += 2;
+			if (pix == 8) {
+				rc = plpar_hcall9(H_BULK_REMOVE, param,
+						  param[0], param[1], param[2],
+						  param[3], param[4], param[5],
+						  param[6], param[7]);
+				BUG_ON(rc != H_SUCCESS);
+				pix = 0;
+			}
+		}
+	}
+	if (pix) {
+		param[pix] = HBR_END;
+		rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
+				  param[2], param[3], param[4], param[5],
+				  param[6], param[7]);
+		BUG_ON(rc != H_SUCCESS);
+	}
+
+	if (lock_tlbie)
+		spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+}
+
+static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
+				       unsigned char *hpte_slot_array,
+				       unsigned long addr, int psize)
+{
+	int ssize = 0, i, index = 0;
+	unsigned long s_addr = addr;
+	unsigned int max_hpte_count, valid;
+	unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
+	unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
+	unsigned long shift, hidx, vpn = 0, vsid, hash, slot;
+
+	shift = mmu_psize_defs[psize].shift;
+	max_hpte_count = 1U << (PMD_SHIFT - shift);
+
+	for (i = 0; i < max_hpte_count; i++) {
+		valid = hpte_valid(hpte_slot_array, i);
+		if (!valid)
+			continue;
+		hidx =  hpte_hash_index(hpte_slot_array, i);
+
+		/* get the vpn */
+		addr = s_addr + (i * (1ul << shift));
+		if (!is_kernel_addr(addr)) {
+			ssize = user_segment_size(addr);
+			vsid = get_vsid(mm->context.id, addr, ssize);
+			WARN_ON(vsid == 0);
+		} else {
+			vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+			ssize = mmu_kernel_ssize;
+		}
+
+		vpn = hpt_vpn(addr, vsid, ssize);
+		hash = hpt_hash(vpn, shift, ssize);
+		if (hidx & _PTEIDX_SECONDARY)
+			hash = ~hash;
+
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += hidx & _PTEIDX_GROUP_IX;
+
+		slot_array[index] = slot;
+		vpn_array[index] = vpn;
+		if (index == PPC64_HUGE_HPTE_BATCH - 1) {
+			/*
+			 * Now do a bluk invalidate
+			 */
+			__pSeries_lpar_hugepage_invalidate(slot_array,
+							   vpn_array,
+							   PPC64_HUGE_HPTE_BATCH,
+							   psize, ssize);
+			index = 0;
+		} else
+			index++;
+	}
+	if (index)
+		__pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
+						   index, psize, ssize);
+}
+
 static void pSeries_lpar_hpte_removebolted(unsigned long ea,
 					   int psize, int ssize)
 {
@@ -356,17 +499,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
 
 	slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
 	BUG_ON(slot == -1);
-
-	pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0);
+	/*
+	 * lpar doesn't use the passed actual page size
+	 */
+	pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0);
 }
 
-/* Flag bits for H_BULK_REMOVE */
-#define HBR_REQUEST	0x4000000000000000UL
-#define HBR_RESPONSE	0x8000000000000000UL
-#define HBR_END		0xc000000000000000UL
-#define HBR_AVPN	0x0200000000000000UL
-#define HBR_ANDCOND	0x0100000000000000UL
-
 /*
  * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
  * lock.
@@ -400,8 +538,11 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
 			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 			slot += hidx & _PTEIDX_GROUP_IX;
 			if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+				/*
+				 * lpar doesn't use the passed actual page size
+				 */
 				pSeries_lpar_hpte_invalidate(slot, vpn, psize,
-							     ssize, local);
+							     0, ssize, local);
 			} else {
 				param[pix] = HBR_REQUEST | HBR_AVPN | slot;
 				param[pix+1] = hpte_encode_avpn(vpn, psize,
@@ -452,6 +593,7 @@ void __init hpte_init_lpar(void)
 	ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted;
 	ppc_md.flush_hash_range	= pSeries_lpar_flush_hash_range;
 	ppc_md.hpte_clear_all   = pSeries_lpar_hptab_clear;
+	ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
 }
 
 #ifdef CONFIG_PPC_SMLPAR
@@ -606,7 +748,7 @@ int h_get_mpp(struct hvcall_mpp_data *mpp_data)
 
 	mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
 	mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
-	mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
+	mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffffUL;
 
 	mpp_data->pool_size = retbuf[4];
 	mpp_data->loan_request = retbuf[5];
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
new file mode 100644
index 00000000000..c9fecf09b8f
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -0,0 +1,710 @@
+/*
+ * PowerPC64 LPAR Configuration Information Driver
+ *
+ * Dave Engebretsen engebret@us.ibm.com
+ *    Copyright (c) 2003 Dave Engebretsen
+ * Will Schmidt willschm@us.ibm.com
+ *    SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
+ *    seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
+ * Nathan Lynch nathanl@austin.ibm.com
+ *    Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ * This driver creates a proc file at /proc/ppc64/lparcfg which contains
+ * keyword - value pairs that specify the configuration of the partition.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/time.h>
+#include <asm/prom.h>
+#include <asm/vdso_datapage.h>
+#include <asm/vio.h>
+#include <asm/mmu.h>
+#include <asm/machdep.h>
+
+
+/*
+ * This isn't a module but we expose that to userspace
+ * via /proc so leave the definitions here
+ */
+#define MODULE_VERS "1.9"
+#define MODULE_NAME "lparcfg"
+
+/* #define LPARCFG_DEBUG */
+
+/*
+ * Track sum of all purrs across all processors. This is used to further
+ * calculate usage values by different applications
+ */
+static unsigned long get_purr(void)
+{
+	unsigned long sum_purr = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_usage *cu;
+
+		cu = &per_cpu(cpu_usage_array, cpu);
+		sum_purr += cu->current_tb;
+	}
+	return sum_purr;
+}
+
+/*
+ * Methods used to fetch LPAR data when running on a pSeries platform.
+ */
+
+struct hvcall_ppp_data {
+	u64	entitlement;
+	u64	unallocated_entitlement;
+	u16	group_num;
+	u16	pool_num;
+	u8	capped;
+	u8	weight;
+	u8	unallocated_weight;
+	u16	active_procs_in_pool;
+	u16	active_system_procs;
+	u16	phys_platform_procs;
+	u32	max_proc_cap_avail;
+	u32	entitled_proc_cap_avail;
+};
+
+/*
+ * H_GET_PPP hcall returns info in 4 parms.
+ *  entitled_capacity,unallocated_capacity,
+ *  aggregation, resource_capability).
+ *
+ *  R4 = Entitled Processor Capacity Percentage.
+ *  R5 = Unallocated Processor Capacity Percentage.
+ *  R6 (AABBCCDDEEFFGGHH).
+ *      XXXX - reserved (0)
+ *          XXXX - reserved (0)
+ *              XXXX - Group Number
+ *                  XXXX - Pool Number.
+ *  R7 (IIJJKKLLMMNNOOPP).
+ *      XX - reserved. (0)
+ *        XX - bit 0-6 reserved (0).   bit 7 is Capped indicator.
+ *          XX - variable processor Capacity Weight
+ *            XX - Unallocated Variable Processor Capacity Weight.
+ *              XXXX - Active processors in Physical Processor Pool.
+ *                  XXXX  - Processors active on platform.
+ *  R8 (QQQQRRRRRRSSSSSS). if ibm,partition-performance-parameters-level >= 1
+ *	XXXX - Physical platform procs allocated to virtualization.
+ *	    XXXXXX - Max procs capacity % available to the partitions pool.
+ *	          XXXXXX - Entitled procs capacity % available to the
+ *			   partitions pool.
+ */
+static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
+{
+	unsigned long rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_GET_PPP, retbuf);
+
+	ppp_data->entitlement = retbuf[0];
+	ppp_data->unallocated_entitlement = retbuf[1];
+
+	ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+	ppp_data->pool_num = retbuf[2] & 0xffff;
+
+	ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
+	ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
+	ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
+	ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
+	ppp_data->active_system_procs = retbuf[3] & 0xffff;
+
+	ppp_data->phys_platform_procs = retbuf[4] >> 6 * 8;
+	ppp_data->max_proc_cap_avail = (retbuf[4] >> 3 * 8) & 0xffffff;
+	ppp_data->entitled_proc_cap_avail = retbuf[4] & 0xffffff;
+
+	return rc;
+}
+
+static unsigned h_pic(unsigned long *pool_idle_time,
+		      unsigned long *num_procs)
+{
+	unsigned long rc;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	rc = plpar_hcall(H_PIC, retbuf);
+
+	*pool_idle_time = retbuf[0];
+	*num_procs = retbuf[1];
+
+	return rc;
+}
+
+/*
+ * parse_ppp_data
+ * Parse out the data returned from h_get_ppp and h_pic
+ */
+static void parse_ppp_data(struct seq_file *m)
+{
+	struct hvcall_ppp_data ppp_data;
+	struct device_node *root;
+	const __be32 *perf_level;
+	int rc;
+
+	rc = h_get_ppp(&ppp_data);
+	if (rc)
+		return;
+
+	seq_printf(m, "partition_entitled_capacity=%lld\n",
+	           ppp_data.entitlement);
+	seq_printf(m, "group=%d\n", ppp_data.group_num);
+	seq_printf(m, "system_active_processors=%d\n",
+	           ppp_data.active_system_procs);
+
+	/* pool related entries are appropriate for shared configs */
+	if (lppaca_shared_proc(get_lppaca())) {
+		unsigned long pool_idle_time, pool_procs;
+
+		seq_printf(m, "pool=%d\n", ppp_data.pool_num);
+
+		/* report pool_capacity in percentage */
+		seq_printf(m, "pool_capacity=%d\n",
+			   ppp_data.active_procs_in_pool * 100);
+
+		h_pic(&pool_idle_time, &pool_procs);
+		seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+		seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+	}
+
+	seq_printf(m, "unallocated_capacity_weight=%d\n",
+		   ppp_data.unallocated_weight);
+	seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
+	seq_printf(m, "capped=%d\n", ppp_data.capped);
+	seq_printf(m, "unallocated_capacity=%lld\n",
+		   ppp_data.unallocated_entitlement);
+
+	/* The last bits of information returned from h_get_ppp are only
+	 * valid if the ibm,partition-performance-parameters-level
+	 * property is >= 1.
+	 */
+	root = of_find_node_by_path("/");
+	if (root) {
+		perf_level = of_get_property(root,
+				"ibm,partition-performance-parameters-level",
+					     NULL);
+		if (perf_level && (be32_to_cpup(perf_level) >= 1)) {
+			seq_printf(m,
+			    "physical_procs_allocated_to_virtualization=%d\n",
+				   ppp_data.phys_platform_procs);
+			seq_printf(m, "max_proc_capacity_available=%d\n",
+				   ppp_data.max_proc_cap_avail);
+			seq_printf(m, "entitled_proc_capacity_available=%d\n",
+				   ppp_data.entitled_proc_cap_avail);
+		}
+
+		of_node_put(root);
+	}
+}
+
+/**
+ * parse_mpp_data
+ * Parse out data returned from h_get_mpp
+ */
+static void parse_mpp_data(struct seq_file *m)
+{
+	struct hvcall_mpp_data mpp_data;
+	int rc;
+
+	rc = h_get_mpp(&mpp_data);
+	if (rc)
+		return;
+
+	seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
+
+	if (mpp_data.mapped_mem != -1)
+		seq_printf(m, "mapped_entitled_memory=%ld\n",
+		           mpp_data.mapped_mem);
+
+	seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
+	seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
+
+	seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
+	seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
+	           mpp_data.unallocated_mem_weight);
+	seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
+	           mpp_data.unallocated_entitlement);
+
+	if (mpp_data.pool_size != -1)
+		seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
+		           mpp_data.pool_size);
+
+	seq_printf(m, "entitled_memory_loan_request=%ld\n",
+	           mpp_data.loan_request);
+
+	seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
+}
+
+/**
+ * parse_mpp_x_data
+ * Parse out data returned from h_get_mpp_x
+ */
+static void parse_mpp_x_data(struct seq_file *m)
+{
+	struct hvcall_mpp_x_data mpp_x_data;
+
+	if (!firmware_has_feature(FW_FEATURE_XCMO))
+		return;
+	if (h_get_mpp_x(&mpp_x_data))
+		return;
+
+	seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes);
+
+	if (mpp_x_data.pool_coalesced_bytes)
+		seq_printf(m, "pool_coalesced_bytes=%ld\n",
+			   mpp_x_data.pool_coalesced_bytes);
+	if (mpp_x_data.pool_purr_cycles)
+		seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles);
+	if (mpp_x_data.pool_spurr_cycles)
+		seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
+}
+
+#define SPLPAR_CHARACTERISTICS_TOKEN 20
+#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+
+/*
+ * parse_system_parameter_string()
+ * Retrieve the potential_processors, max_entitled_capacity and friends
+ * through the get-system-parameter rtas call.  Replace keyword strings as
+ * necessary.
+ */
+static void parse_system_parameter_string(struct seq_file *m)
+{
+	int call_status;
+
+	unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+	if (!local_buffer) {
+		printk(KERN_ERR "%s %s kmalloc failure at line %d\n",
+		       __FILE__, __func__, __LINE__);
+		return;
+	}
+
+	spin_lock(&rtas_data_buf_lock);
+	memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
+	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+				NULL,
+				SPLPAR_CHARACTERISTICS_TOKEN,
+				__pa(rtas_data_buf),
+				RTAS_DATA_BUF_SIZE);
+	memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
+	local_buffer[SPLPAR_MAXLENGTH - 1] = '\0';
+	spin_unlock(&rtas_data_buf_lock);
+
+	if (call_status != 0) {
+		printk(KERN_INFO
+		       "%s %s Error calling get-system-parameter (0x%x)\n",
+		       __FILE__, __func__, call_status);
+	} else {
+		int splpar_strlen;
+		int idx, w_idx;
+		char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+		if (!workbuffer) {
+			printk(KERN_ERR "%s %s kmalloc failure at line %d\n",
+			       __FILE__, __func__, __LINE__);
+			kfree(local_buffer);
+			return;
+		}
+#ifdef LPARCFG_DEBUG
+		printk(KERN_INFO "success calling get-system-parameter\n");
+#endif
+		splpar_strlen = local_buffer[0] * 256 + local_buffer[1];
+		local_buffer += 2;	/* step over strlen value */
+
+		w_idx = 0;
+		idx = 0;
+		while ((*local_buffer) && (idx < splpar_strlen)) {
+			workbuffer[w_idx++] = local_buffer[idx++];
+			if ((local_buffer[idx] == ',')
+			    || (local_buffer[idx] == '\0')) {
+				workbuffer[w_idx] = '\0';
+				if (w_idx) {
+					/* avoid the empty string */
+					seq_printf(m, "%s\n", workbuffer);
+				}
+				memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+				idx++;	/* skip the comma */
+				w_idx = 0;
+			} else if (local_buffer[idx] == '=') {
+				/* code here to replace workbuffer contents
+				   with different keyword strings */
+				if (0 == strcmp(workbuffer, "MaxEntCap")) {
+					strcpy(workbuffer,
+					       "partition_max_entitled_capacity");
+					w_idx = strlen(workbuffer);
+				}
+				if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
+					strcpy(workbuffer,
+					       "system_potential_processors");
+					w_idx = strlen(workbuffer);
+				}
+			}
+		}
+		kfree(workbuffer);
+		local_buffer -= 2;	/* back up over strlen value */
+	}
+	kfree(local_buffer);
+}
+
+/* Return the number of processors in the system.
+ * This function reads through the device tree and counts
+ * the virtual processors, this does not include threads.
+ */
+static int lparcfg_count_active_processors(void)
+{
+	struct device_node *cpus_dn = NULL;
+	int count = 0;
+
+	while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
+#ifdef LPARCFG_DEBUG
+		printk(KERN_ERR "cpus_dn %p\n", cpus_dn);
+#endif
+		count++;
+	}
+	return count;
+}
+
+static void pseries_cmo_data(struct seq_file *m)
+{
+	int cpu;
+	unsigned long cmo_faults = 0;
+	unsigned long cmo_fault_time = 0;
+
+	seq_printf(m, "cmo_enabled=%d\n", firmware_has_feature(FW_FEATURE_CMO));
+
+	if (!firmware_has_feature(FW_FEATURE_CMO))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		cmo_faults += be64_to_cpu(lppaca_of(cpu).cmo_faults);
+		cmo_fault_time += be64_to_cpu(lppaca_of(cpu).cmo_fault_time);
+	}
+
+	seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
+	seq_printf(m, "cmo_fault_time_usec=%lu\n",
+		   cmo_fault_time / tb_ticks_per_usec);
+	seq_printf(m, "cmo_primary_psp=%d\n", cmo_get_primary_psp());
+	seq_printf(m, "cmo_secondary_psp=%d\n", cmo_get_secondary_psp());
+	seq_printf(m, "cmo_page_size=%lu\n", cmo_get_page_size());
+}
+
+static void splpar_dispatch_data(struct seq_file *m)
+{
+	int cpu;
+	unsigned long dispatches = 0;
+	unsigned long dispatch_dispersions = 0;
+
+	for_each_possible_cpu(cpu) {
+		dispatches += be32_to_cpu(lppaca_of(cpu).yield_count);
+		dispatch_dispersions +=
+			be32_to_cpu(lppaca_of(cpu).dispersion_count);
+	}
+
+	seq_printf(m, "dispatches=%lu\n", dispatches);
+	seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions);
+}
+
+static void parse_em_data(struct seq_file *m)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	if (firmware_has_feature(FW_FEATURE_LPAR) &&
+	    plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS)
+		seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
+}
+
+static int pseries_lparcfg_data(struct seq_file *m, void *v)
+{
+	int partition_potential_processors;
+	int partition_active_processors;
+	struct device_node *rtas_node;
+	const __be32 *lrdrp = NULL;
+
+	rtas_node = of_find_node_by_path("/rtas");
+	if (rtas_node)
+		lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL);
+
+	if (lrdrp == NULL) {
+		partition_potential_processors = vdso_data->processorCount;
+	} else {
+		partition_potential_processors = be32_to_cpup(lrdrp + 4);
+	}
+	of_node_put(rtas_node);
+
+	partition_active_processors = lparcfg_count_active_processors();
+
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		/* this call handles the ibm,get-system-parameter contents */
+		parse_system_parameter_string(m);
+		parse_ppp_data(m);
+		parse_mpp_data(m);
+		parse_mpp_x_data(m);
+		pseries_cmo_data(m);
+		splpar_dispatch_data(m);
+
+		seq_printf(m, "purr=%ld\n", get_purr());
+	} else {		/* non SPLPAR case */
+
+		seq_printf(m, "system_active_processors=%d\n",
+			   partition_potential_processors);
+
+		seq_printf(m, "system_potential_processors=%d\n",
+			   partition_potential_processors);
+
+		seq_printf(m, "partition_max_entitled_capacity=%d\n",
+			   partition_potential_processors * 100);
+
+		seq_printf(m, "partition_entitled_capacity=%d\n",
+			   partition_active_processors * 100);
+	}
+
+	seq_printf(m, "partition_active_processors=%d\n",
+		   partition_active_processors);
+
+	seq_printf(m, "partition_potential_processors=%d\n",
+		   partition_potential_processors);
+
+	seq_printf(m, "shared_processor_mode=%d\n",
+		   lppaca_shared_proc(get_lppaca()));
+
+	seq_printf(m, "slb_size=%d\n", mmu_slb_size);
+
+	parse_em_data(m);
+
+	return 0;
+}
+
+static ssize_t update_ppp(u64 *entitlement, u8 *weight)
+{
+	struct hvcall_ppp_data ppp_data;
+	u8 new_weight;
+	u64 new_entitled;
+	ssize_t retval;
+
+	/* Get our current parameters */
+	retval = h_get_ppp(&ppp_data);
+	if (retval)
+		return retval;
+
+	if (entitlement) {
+		new_weight = ppp_data.weight;
+		new_entitled = *entitlement;
+	} else if (weight) {
+		new_weight = *weight;
+		new_entitled = ppp_data.entitlement;
+	} else
+		return -EINVAL;
+
+	pr_debug("%s: current_entitled = %llu, current_weight = %u\n",
+		 __func__, ppp_data.entitlement, ppp_data.weight);
+
+	pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
+		 __func__, new_entitled, new_weight);
+
+	retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
+	return retval;
+}
+
+/**
+ * update_mpp
+ *
+ * Update the memory entitlement and weight for the partition.  Caller must
+ * specify either a new entitlement or weight, not both, to be updated
+ * since the h_set_mpp call takes both entitlement and weight as parameters.
+ */
+static ssize_t update_mpp(u64 *entitlement, u8 *weight)
+{
+	struct hvcall_mpp_data mpp_data;
+	u64 new_entitled;
+	u8 new_weight;
+	ssize_t rc;
+
+	if (entitlement) {
+		/* Check with vio to ensure the new memory entitlement
+		 * can be handled.
+		 */
+		rc = vio_cmo_entitlement_update(*entitlement);
+		if (rc)
+			return rc;
+	}
+
+	rc = h_get_mpp(&mpp_data);
+	if (rc)
+		return rc;
+
+	if (entitlement) {
+		new_weight = mpp_data.mem_weight;
+		new_entitled = *entitlement;
+	} else if (weight) {
+		new_weight = *weight;
+		new_entitled = mpp_data.entitled_mem;
+	} else
+		return -EINVAL;
+
+	pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+	         __func__, mpp_data.entitled_mem, mpp_data.mem_weight);
+
+	pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
+		 __func__, new_entitled, new_weight);
+
+	rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
+	return rc;
+}
+
+/*
+ * Interface for changing system parameters (variable capacity weight
+ * and entitled capacity).  Format of input is "param_name=value";
+ * anything after value is ignored.  Valid parameters at this time are
+ * "partition_entitled_capacity" and "capacity_weight".  We use
+ * H_SET_PPP to alter parameters.
+ *
+ * This function should be invoked only on systems with
+ * FW_FEATURE_SPLPAR.
+ */
+static ssize_t lparcfg_write(struct file *file, const char __user * buf,
+			     size_t count, loff_t * off)
+{
+	int kbuf_sz = 64;
+	char kbuf[kbuf_sz];
+	char *tmp;
+	u64 new_entitled, *new_entitled_ptr = &new_entitled;
+	u8 new_weight, *new_weight_ptr = &new_weight;
+	ssize_t retval;
+
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return -EINVAL;
+
+	if (count > kbuf_sz)
+		return -EINVAL;
+
+	if (copy_from_user(kbuf, buf, count))
+		return -EFAULT;
+
+	kbuf[count - 1] = '\0';
+	tmp = strchr(kbuf, '=');
+	if (!tmp)
+		return -EINVAL;
+
+	*tmp++ = '\0';
+
+	if (!strcmp(kbuf, "partition_entitled_capacity")) {
+		char *endp;
+		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_ppp(new_entitled_ptr, NULL);
+	} else if (!strcmp(kbuf, "capacity_weight")) {
+		char *endp;
+		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_ppp(NULL, new_weight_ptr);
+	} else if (!strcmp(kbuf, "entitled_memory")) {
+		char *endp;
+		*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_mpp(new_entitled_ptr, NULL);
+	} else if (!strcmp(kbuf, "entitled_memory_weight")) {
+		char *endp;
+		*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+		if (endp == tmp)
+			return -EINVAL;
+
+		retval = update_mpp(NULL, new_weight_ptr);
+	} else
+		return -EINVAL;
+
+	if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
+		retval = count;
+	} else if (retval == H_BUSY) {
+		retval = -EBUSY;
+	} else if (retval == H_HARDWARE) {
+		retval = -EIO;
+	} else if (retval == H_PARAMETER) {
+		retval = -EINVAL;
+	}
+
+	return retval;
+}
+
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+	struct device_node *rootdn;
+	const char *model = "";
+	const char *system_id = "";
+	const char *tmp;
+	const __be32 *lp_index_ptr;
+	unsigned int lp_index = 0;
+
+	seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS);
+
+	rootdn = of_find_node_by_path("/");
+	if (rootdn) {
+		tmp = of_get_property(rootdn, "model", NULL);
+		if (tmp)
+			model = tmp;
+		tmp = of_get_property(rootdn, "system-id", NULL);
+		if (tmp)
+			system_id = tmp;
+		lp_index_ptr = of_get_property(rootdn, "ibm,partition-no",
+					NULL);
+		if (lp_index_ptr)
+			lp_index = be32_to_cpup(lp_index_ptr);
+		of_node_put(rootdn);
+	}
+	seq_printf(m, "serial_number=%s\n", system_id);
+	seq_printf(m, "system_type=%s\n", model);
+	seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+	return pseries_lparcfg_data(m, v);
+}
+
+static int lparcfg_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, lparcfg_data, NULL);
+}
+
+static const struct file_operations lparcfg_fops = {
+	.read		= seq_read,
+	.write		= lparcfg_write,
+	.open		= lparcfg_open,
+	.release	= single_release,
+	.llseek		= seq_lseek,
+};
+
+static int __init lparcfg_init(void)
+{
+	umode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
+
+	/* Allow writing if we have FW_FEATURE_SPLPAR */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR))
+		mode |= S_IWUSR;
+
+	if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) {
+		printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
+		return -EIO;
+	}
+	return 0;
+}
+machine_device_initcall(pseries, lparcfg_init);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 3d01eee9ffb..bde7ebad394 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -28,7 +28,7 @@ struct update_props_workarea {
 	u32 state;
 	u64 reserved;
 	u32 nprops;
-};
+} __packed;
 
 #define NODE_ACTION_MASK	0xff000000
 #define NODE_COUNT_MASK		0x00ffffff
@@ -62,6 +62,7 @@ static int delete_dt_node(u32 phandle)
 		return -ENOENT;
 
 	dlpar_detach_node(dn);
+	of_node_put(dn);
 	return 0;
 }
 
@@ -119,7 +120,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
 
 	if (!more) {
 		of_update_property(dn, new_prop);
-		new_prop = NULL;
+		*prop = NULL;
 	}
 
 	return 0;
@@ -130,7 +131,7 @@ static int update_dt_node(u32 phandle, s32 scope)
 	struct update_props_workarea *upwa;
 	struct device_node *dn;
 	struct property *prop = NULL;
-	int i, rc;
+	int i, rc, rtas_rc;
 	char *prop_data;
 	char *rtas_buf;
 	int update_properties_token;
@@ -154,25 +155,26 @@ static int update_dt_node(u32 phandle, s32 scope)
 	upwa->phandle = phandle;
 
 	do {
-		rc = mobility_rtas_call(update_properties_token, rtas_buf,
+		rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf,
 					scope);
-		if (rc < 0)
+		if (rtas_rc < 0)
 			break;
 
 		prop_data = rtas_buf + sizeof(*upwa);
 
-		/* The first element of the buffer is the path of the node
-		 * being updated in the form of a 8 byte string length
-		 * followed by the string. Skip past this to get to the
-		 * properties being updated.
+		/* On the first call to ibm,update-properties for a node the
+		 * the first property value descriptor contains an empty
+		 * property name, the property value length encoded as u32,
+		 * and the property value is the node path being updated.
 		 */
-		vd = *prop_data++;
-		prop_data += vd;
+		if (*prop_data == 0) {
+			prop_data++;
+			vd = *(u32 *)prop_data;
+			prop_data += vd + sizeof(vd);
+			upwa->nprops--;
+		}
 
-		/* The path we skipped over is counted as one of the elements
-		 * returned so start counting at one.
-		 */
-		for (i = 1; i < upwa->nprops; i++) {
+		for (i = 0; i < upwa->nprops; i++) {
 			char *prop_name;
 
 			prop_name = prop_data;
@@ -202,7 +204,7 @@ static int update_dt_node(u32 phandle, s32 scope)
 				prop_data += vd;
 			}
 		}
-	} while (rc == 1);
+	} while (rtas_rc == 1);
 
 	of_node_put(dn);
 	kfree(rtas_buf);
@@ -215,17 +217,14 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index)
 	struct device_node *parent_dn;
 	int rc;
 
-	dn = dlpar_configure_connector(drc_index);
-	if (!dn)
+	parent_dn = of_find_node_by_phandle(parent_phandle);
+	if (!parent_dn)
 		return -ENOENT;
 
-	parent_dn = of_find_node_by_phandle(parent_phandle);
-	if (!parent_dn) {
-		dlpar_free_cc_nodes(dn);
+	dn = dlpar_configure_connector(drc_index, parent_dn);
+	if (!dn)
 		return -ENOENT;
-	}
 
-	dn->parent = parent_dn;
 	rc = dlpar_attach_node(dn);
 	if (rc)
 		dlpar_free_cc_nodes(dn);
@@ -291,13 +290,6 @@ void post_mobility_fixup(void)
 	int rc;
 	int activate_fw_token;
 
-	rc = pseries_devicetree_update(MIGRATION_SCOPE);
-	if (rc) {
-		printk(KERN_ERR "Initial post-mobility device tree update "
-		       "failed: %d\n", rc);
-		return;
-	}
-
 	activate_fw_token = rtas_token("ibm,activate-firmware");
 	if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
 		printk(KERN_ERR "Could not make post-mobility "
@@ -305,16 +297,17 @@ void post_mobility_fixup(void)
 		return;
 	}
 
-	rc = rtas_call(activate_fw_token, 0, 1, NULL);
-	if (!rc) {
-		rc = pseries_devicetree_update(MIGRATION_SCOPE);
-		if (rc)
-			printk(KERN_ERR "Secondary post-mobility device tree "
-			       "update failed: %d\n", rc);
-	} else {
+	do {
+		rc = rtas_call(activate_fw_token, 0, 1, NULL);
+	} while (rtas_busy_delay(rc));
+
+	if (rc)
 		printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc);
-		return;
-	}
+
+	rc = pseries_devicetree_update(MIGRATION_SCOPE);
+	if (rc)
+		printk(KERN_ERR "Post-mobility device tree update "
+			"failed: %d\n", rc);
 
 	return;
 }
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 6d2f0abce6f..0c882e83c4c 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -130,7 +130,8 @@ static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
 {
 	struct device_node *dn;
 	struct pci_dn *pdn;
-	const u32 *req_msi;
+	const __be32 *p;
+	u32 req_msi;
 
 	pdn = pci_get_pdn(pdev);
 	if (!pdn)
@@ -138,19 +139,20 @@ static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
 
 	dn = pdn->node;
 
-	req_msi = of_get_property(dn, prop_name, NULL);
-	if (!req_msi) {
+	p = of_get_property(dn, prop_name, NULL);
+	if (!p) {
 		pr_debug("rtas_msi: No %s on %s\n", prop_name, dn->full_name);
 		return -ENOENT;
 	}
 
-	if (*req_msi < nvec) {
+	req_msi = be32_to_cpup(p);
+	if (req_msi < nvec) {
 		pr_debug("rtas_msi: %s requests < %d MSIs\n", prop_name, nvec);
 
-		if (*req_msi == 0) /* Be paranoid */
+		if (req_msi == 0) /* Be paranoid */
 			return -ENOSPC;
 
-		return *req_msi;
+		return req_msi;
 	}
 
 	return 0;
@@ -171,7 +173,7 @@ static int check_req_msix(struct pci_dev *pdev, int nvec)
 static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
 {
 	struct device_node *dn;
-	const u32 *p;
+	const __be32 *p;
 
 	dn = of_node_get(pci_device_to_OF_node(dev));
 	while (dn) {
@@ -179,7 +181,7 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
 		if (p) {
 			pr_debug("rtas_msi: found prop on dn %s\n",
 				dn->full_name);
-			*total = *p;
+			*total = be32_to_cpup(p);
 			return dn;
 		}
 
@@ -232,13 +234,13 @@ struct msi_counts {
 static void *count_non_bridge_devices(struct device_node *dn, void *data)
 {
 	struct msi_counts *counts = data;
-	const u32 *p;
+	const __be32 *p;
 	u32 class;
 
 	pr_debug("rtas_msi: counting %s\n", dn->full_name);
 
 	p = of_get_property(dn, "class-code", NULL);
-	class = p ? *p : 0;
+	class = p ? be32_to_cpup(p) : 0;
 
 	if ((class >> 8) != PCI_CLASS_BRIDGE_PCI)
 		counts->num_devices++;
@@ -249,7 +251,7 @@ static void *count_non_bridge_devices(struct device_node *dn, void *data)
 static void *count_spare_msis(struct device_node *dn, void *data)
 {
 	struct msi_counts *counts = data;
-	const u32 *p;
+	const __be32 *p;
 	int req;
 
 	if (dn == counts->requestor)
@@ -260,11 +262,11 @@ static void *count_spare_msis(struct device_node *dn, void *data)
 		req = 0;
 		p = of_get_property(dn, "ibm,req#msi", NULL);
 		if (p)
-			req = *p;
+			req = be32_to_cpup(p);
 
 		p = of_get_property(dn, "ibm,req#msi-x", NULL);
 		if (p)
-			req = max(req, (int)*p);
+			req = max(req, (int)be32_to_cpup(p));
 	}
 
 	if (req < counts->quota)
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 8733a86ad52..0cc240b7f69 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -18,6 +18,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/kmsg_dump.h>
+#include <linux/pstore.h>
 #include <linux/ctype.h>
 #include <linux/zlib.h>
 #include <asm/uaccess.h>
@@ -29,14 +30,21 @@
 /* Max bytes to read/write in one go */
 #define NVRW_CNT 0x20
 
+/*
+ * Set oops header version to distinguish between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version > 4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
+
 static unsigned int nvram_size;
 static int nvram_fetch, nvram_store;
 static char nvram_buf[NVRW_CNT];	/* assume this is in the first 4GB */
 static DEFINE_SPINLOCK(nvram_lock);
 
 struct err_log_info {
-	int error_type;
-	unsigned int seq_num;
+	__be32 error_type;
+	__be32 seq_num;
 };
 
 struct nvram_os_partition {
@@ -45,20 +53,23 @@ struct nvram_os_partition {
 	int min_size;	/* minimum acceptable size (0 means req_size) */
 	long size;	/* size of data portion (excluding err_log_info) */
 	long index;	/* offset of data portion of partition */
+	bool os_partition; /* partition initialized by OS, not FW */
 };
 
 static struct nvram_os_partition rtas_log_partition = {
 	.name = "ibm,rtas-log",
 	.req_size = 2079,
 	.min_size = 1055,
-	.index = -1
+	.index = -1,
+	.os_partition = true
 };
 
 static struct nvram_os_partition oops_log_partition = {
 	.name = "lnx,oops-log",
 	.req_size = 4000,
 	.min_size = 2000,
-	.index = -1
+	.index = -1,
+	.os_partition = true
 };
 
 static const char *pseries_nvram_os_partitions[] = {
@@ -67,6 +78,12 @@ static const char *pseries_nvram_os_partitions[] = {
 	NULL
 };
 
+struct oops_log_info {
+	__be16 version;
+	__be16 report_length;
+	__be64 timestamp;
+} __attribute__((packed));
+
 static void oops_to_nvram(struct kmsg_dumper *dumper,
 			  enum kmsg_dump_reason reason);
 
@@ -83,28 +100,28 @@ static unsigned long last_unread_rtas_event;	/* timestamp */
 
  * big_oops_buf[] holds the uncompressed text we're capturing.
  *
- * oops_buf[] holds the compressed text, preceded by a prefix.
- * The prefix is just a u16 holding the length of the compressed* text.
- * (*Or uncompressed, if compression fails.)  oops_buf[] gets written
- * to NVRAM.
+ * oops_buf[] holds the compressed text, preceded by a oops header.
+ * oops header has u16 holding the version of oops header (to differentiate
+ * between old and new format header) followed by u16 holding the length of
+ * the compressed* text (*Or uncompressed, if compression fails.) and u64
+ * holding the timestamp. oops_buf[] gets written to NVRAM.
  *
- * oops_len points to the prefix.  oops_data points to the compressed text.
+ * oops_log_info points to the header. oops_data points to the compressed text.
  *
  * +- oops_buf
- * |		+- oops_data
- * v		v
- * +------------+-----------------------------------------------+
- * | length	| text                                          |
- * | (2 bytes)	| (oops_data_sz bytes)                          |
- * +------------+-----------------------------------------------+
+ * |                                   +- oops_data
+ * v                                   v
+ * +-----------+-----------+-----------+------------------------+
+ * | version   | length    | timestamp | text                   |
+ * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes)   |
+ * +-----------+-----------+-----------+------------------------+
  * ^
- * +- oops_len
+ * +- oops_log_info
  *
  * We preallocate these buffers during init to avoid kmalloc during oops/panic.
  */
 static size_t big_oops_buf_sz;
 static char *big_oops_buf, *oops_buf;
-static u16 *oops_len;
 static char *oops_data;
 static size_t oops_data_sz;
 
@@ -114,6 +131,30 @@ static size_t oops_data_sz;
 #define MEM_LEVEL 4
 static struct z_stream_s stream;
 
+#ifdef CONFIG_PSTORE
+static struct nvram_os_partition of_config_partition = {
+	.name = "of-config",
+	.index = -1,
+	.os_partition = false
+};
+
+static struct nvram_os_partition common_partition = {
+	.name = "common",
+	.index = -1,
+	.os_partition = false
+};
+
+static enum pstore_type_id nvram_type_ids[] = {
+	PSTORE_TYPE_DMESG,
+	PSTORE_TYPE_PPC_RTAS,
+	PSTORE_TYPE_PPC_OF,
+	PSTORE_TYPE_PPC_COMMON,
+	-1
+};
+static int read_type;
+static unsigned long last_rtas_event;
+#endif
+
 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
 {
 	unsigned int i;
@@ -250,20 +291,20 @@ int nvram_write_os_partition(struct nvram_os_partition *part, char * buff,
 		length = part->size;
 	}
 
-	info.error_type = err_type;
-	info.seq_num = error_log_cnt;
+	info.error_type = cpu_to_be32(err_type);
+	info.seq_num = cpu_to_be32(error_log_cnt);
 
 	tmp_index = part->index;
 
 	rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
 	if (rc <= 0) {
-		pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
+		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
 		return rc;
 	}
 
 	rc = ppc_md.nvram_write(buff, length, &tmp_index);
 	if (rc <= 0) {
-		pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
+		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
 		return rc;
 	}
 	
@@ -275,48 +316,71 @@ int nvram_write_error_log(char * buff, int length,
 {
 	int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
 						err_type, error_log_cnt);
-	if (!rc)
+	if (!rc) {
 		last_unread_rtas_event = get_seconds();
+#ifdef CONFIG_PSTORE
+		last_rtas_event = get_seconds();
+#endif
+	}
+
 	return rc;
 }
 
-/* nvram_read_error_log
+/* nvram_read_partition
  *
- * Reads nvram for error log for at most 'length'
+ * Reads nvram partition for at most 'length'
  */
-int nvram_read_error_log(char * buff, int length,
-                         unsigned int * err_type, unsigned int * error_log_cnt)
+int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+			int length, unsigned int *err_type,
+			unsigned int *error_log_cnt)
 {
 	int rc;
 	loff_t tmp_index;
 	struct err_log_info info;
 	
-	if (rtas_log_partition.index == -1)
+	if (part->index == -1)
 		return -1;
 
-	if (length > rtas_log_partition.size)
-		length = rtas_log_partition.size;
+	if (length > part->size)
+		length = part->size;
 
-	tmp_index = rtas_log_partition.index;
+	tmp_index = part->index;
 
-	rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
-	if (rc <= 0) {
-		printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
-		return rc;
+	if (part->os_partition) {
+		rc = ppc_md.nvram_read((char *)&info,
+					sizeof(struct err_log_info),
+					&tmp_index);
+		if (rc <= 0) {
+			pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
+			return rc;
+		}
 	}
 
 	rc = ppc_md.nvram_read(buff, length, &tmp_index);
 	if (rc <= 0) {
-		printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+		pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
 		return rc;
 	}
 
-	*error_log_cnt = info.seq_num;
-	*err_type = info.error_type;
+	if (part->os_partition) {
+		*error_log_cnt = be32_to_cpu(info.seq_num);
+		*err_type = be32_to_cpu(info.error_type);
+	}
 
 	return 0;
 }
 
+/* nvram_read_error_log
+ *
+ * Reads nvram for error log for at most 'length'
+ */
+int nvram_read_error_log(char *buff, int length,
+			unsigned int *err_type, unsigned int *error_log_cnt)
+{
+	return nvram_read_partition(&rtas_log_partition, buff, length,
+						err_type, error_log_cnt);
+}
+
 /* This doesn't actually zero anything, but it sets the event_logged
  * word to tell that this event is safely in syslog.
  */
@@ -364,9 +428,6 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
 	loff_t p;
 	int size;
 
-	/* Scan nvram for partitions */
-	nvram_scan_partitions();
-
 	/* Look for ours */
 	p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
 
@@ -405,6 +466,267 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
 	return 0;
 }
 
+/*
+ * Are we using the ibm,rtas-log for oops/panic reports?  And if so,
+ * would logging this oops/panic overwrite an RTAS event that rtas_errd
+ * hasn't had a chance to read and process?  Return 1 if so, else 0.
+ *
+ * We assume that if rtas_errd hasn't read the RTAS event in
+ * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
+ */
+static int clobbering_unread_rtas_event(void)
+{
+	return (oops_log_partition.index == rtas_log_partition.index
+		&& last_unread_rtas_event
+		&& get_seconds() - last_unread_rtas_event <=
+						NVRAM_RTAS_READ_TIMEOUT);
+}
+
+/* Derived from logfs_compress() */
+static int nvram_compress(const void *in, void *out, size_t inlen,
+							size_t outlen)
+{
+	int err, ret;
+
+	ret = -EIO;
+	err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
+						MEM_LEVEL, Z_DEFAULT_STRATEGY);
+	if (err != Z_OK)
+		goto error;
+
+	stream.next_in = in;
+	stream.avail_in = inlen;
+	stream.total_in = 0;
+	stream.next_out = out;
+	stream.avail_out = outlen;
+	stream.total_out = 0;
+
+	err = zlib_deflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END)
+		goto error;
+
+	err = zlib_deflateEnd(&stream);
+	if (err != Z_OK)
+		goto error;
+
+	if (stream.total_out >= stream.total_in)
+		goto error;
+
+	ret = stream.total_out;
+error:
+	return ret;
+}
+
+/* Compress the text from big_oops_buf into oops_buf. */
+static int zip_oops(size_t text_len)
+{
+	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+	int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
+								oops_data_sz);
+	if (zipped_len < 0) {
+		pr_err("nvram: compression failed; returned %d\n", zipped_len);
+		pr_err("nvram: logging uncompressed oops/panic report\n");
+		return -1;
+	}
+	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+	oops_hdr->report_length = cpu_to_be16(zipped_len);
+	oops_hdr->timestamp = cpu_to_be64(get_seconds());
+	return 0;
+}
+
+#ifdef CONFIG_PSTORE
+static int nvram_pstore_open(struct pstore_info *psi)
+{
+	/* Reset the iterator to start reading partitions again */
+	read_type = -1;
+	return 0;
+}
+
+/**
+ * nvram_pstore_write - pstore write callback for nvram
+ * @type:               Type of message logged
+ * @reason:             reason behind dump (oops/panic)
+ * @id:                 identifier to indicate the write performed
+ * @part:               pstore writes data to registered buffer in parts,
+ *                      part number will indicate the same.
+ * @count:              Indicates oops count
+ * @compressed:         Flag to indicate the log is compressed
+ * @size:               number of bytes written to the registered buffer
+ * @psi:                registered pstore_info structure
+ *
+ * Called by pstore_dump() when an oops or panic report is logged in the
+ * printk buffer.
+ * Returns 0 on successful write.
+ */
+static int nvram_pstore_write(enum pstore_type_id type,
+				enum kmsg_dump_reason reason,
+				u64 *id, unsigned int part, int count,
+				bool compressed, size_t size,
+				struct pstore_info *psi)
+{
+	int rc;
+	unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
+	struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
+
+	/* part 1 has the recent messages from printk buffer */
+	if (part > 1 || type != PSTORE_TYPE_DMESG ||
+				clobbering_unread_rtas_event())
+		return -1;
+
+	oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+	oops_hdr->report_length = cpu_to_be16(size);
+	oops_hdr->timestamp = cpu_to_be64(get_seconds());
+
+	if (compressed)
+		err_type = ERR_TYPE_KERNEL_PANIC_GZ;
+
+	rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
+		(int) (sizeof(*oops_hdr) + size), err_type, count);
+
+	if (rc != 0)
+		return rc;
+
+	*id = part;
+	return 0;
+}
+
+/*
+ * Reads the oops/panic report, rtas, of-config and common partition.
+ * Returns the length of the data we read from each partition.
+ * Returns 0 if we've been called before.
+ */
+static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
+				int *count, struct timespec *time, char **buf,
+				bool *compressed, struct pstore_info *psi)
+{
+	struct oops_log_info *oops_hdr;
+	unsigned int err_type, id_no, size = 0;
+	struct nvram_os_partition *part = NULL;
+	char *buff = NULL;
+	int sig = 0;
+	loff_t p;
+
+	read_type++;
+
+	switch (nvram_type_ids[read_type]) {
+	case PSTORE_TYPE_DMESG:
+		part = &oops_log_partition;
+		*type = PSTORE_TYPE_DMESG;
+		break;
+	case PSTORE_TYPE_PPC_RTAS:
+		part = &rtas_log_partition;
+		*type = PSTORE_TYPE_PPC_RTAS;
+		time->tv_sec = last_rtas_event;
+		time->tv_nsec = 0;
+		break;
+	case PSTORE_TYPE_PPC_OF:
+		sig = NVRAM_SIG_OF;
+		part = &of_config_partition;
+		*type = PSTORE_TYPE_PPC_OF;
+		*id = PSTORE_TYPE_PPC_OF;
+		time->tv_sec = 0;
+		time->tv_nsec = 0;
+		break;
+	case PSTORE_TYPE_PPC_COMMON:
+		sig = NVRAM_SIG_SYS;
+		part = &common_partition;
+		*type = PSTORE_TYPE_PPC_COMMON;
+		*id = PSTORE_TYPE_PPC_COMMON;
+		time->tv_sec = 0;
+		time->tv_nsec = 0;
+		break;
+	default:
+		return 0;
+	}
+
+	if (!part->os_partition) {
+		p = nvram_find_partition(part->name, sig, &size);
+		if (p <= 0) {
+			pr_err("nvram: Failed to find partition %s, "
+				"err %d\n", part->name, (int)p);
+			return 0;
+		}
+		part->index = p;
+		part->size = size;
+	}
+
+	buff = kmalloc(part->size, GFP_KERNEL);
+
+	if (!buff)
+		return -ENOMEM;
+
+	if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
+		kfree(buff);
+		return 0;
+	}
+
+	*count = 0;
+
+	if (part->os_partition)
+		*id = id_no;
+
+	if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
+		size_t length, hdr_size;
+
+		oops_hdr = (struct oops_log_info *)buff;
+		if (be16_to_cpu(oops_hdr->version) < OOPS_HDR_VERSION) {
+			/* Old format oops header had 2-byte record size */
+			hdr_size = sizeof(u16);
+			length = be16_to_cpu(oops_hdr->version);
+			time->tv_sec = 0;
+			time->tv_nsec = 0;
+		} else {
+			hdr_size = sizeof(*oops_hdr);
+			length = be16_to_cpu(oops_hdr->report_length);
+			time->tv_sec = be64_to_cpu(oops_hdr->timestamp);
+			time->tv_nsec = 0;
+		}
+		*buf = kmalloc(length, GFP_KERNEL);
+		if (*buf == NULL)
+			return -ENOMEM;
+		memcpy(*buf, buff + hdr_size, length);
+		kfree(buff);
+
+		if (err_type == ERR_TYPE_KERNEL_PANIC_GZ)
+			*compressed = true;
+		else
+			*compressed = false;
+		return length;
+	}
+
+	*buf = buff;
+	return part->size;
+}
+
+static struct pstore_info nvram_pstore_info = {
+	.owner = THIS_MODULE,
+	.name = "nvram",
+	.open = nvram_pstore_open,
+	.read = nvram_pstore_read,
+	.write = nvram_pstore_write,
+};
+
+static int nvram_pstore_init(void)
+{
+	int rc = 0;
+
+	nvram_pstore_info.buf = oops_data;
+	nvram_pstore_info.bufsize = oops_data_sz;
+
+	rc = pstore_register(&nvram_pstore_info);
+	if (rc != 0)
+		pr_err("nvram: pstore_register() failed, defaults to "
+				"kmsg_dump; returned %d\n", rc);
+
+	return rc;
+}
+#else
+static int nvram_pstore_init(void)
+{
+	return -1;
+}
+#endif
+
 static void __init nvram_init_oops_partition(int rtas_partition_exists)
 {
 	int rc;
@@ -425,9 +747,13 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
 						oops_log_partition.name);
 		return;
 	}
-	oops_len = (u16*) oops_buf;
-	oops_data = oops_buf + sizeof(u16);
-	oops_data_sz = oops_log_partition.size - sizeof(u16);
+	oops_data = oops_buf + sizeof(struct oops_log_info);
+	oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
+
+	rc = nvram_pstore_init();
+
+	if (!rc)
+		return;
 
 	/*
 	 * Figure compression (preceded by elimination of each line's <n>
@@ -437,8 +763,8 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
 	big_oops_buf_sz = (oops_data_sz * 100) / 45;
 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 	if (big_oops_buf) {
-		stream.workspace = kmalloc(zlib_deflate_workspacesize(
-				WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
+		stream.workspace =  kmalloc(zlib_deflate_workspacesize(
+					WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
 		if (!stream.workspace) {
 			pr_err("nvram: No memory for compression workspace; "
 				"skipping compression of %s partition data\n",
@@ -465,6 +791,9 @@ static int __init pseries_nvram_init_log_partitions(void)
 {
 	int rc;
 
+	/* Scan nvram for partitions */
+	nvram_scan_partitions();
+
 	rc = pseries_nvram_init_os_partition(&rtas_log_partition);
 	nvram_init_oops_partition(rc == 0);
 	return 0;
@@ -474,7 +803,7 @@ machine_arch_initcall(pseries, pseries_nvram_init_log_partitions);
 int __init pSeries_nvram_init(void)
 {
 	struct device_node *nvram;
-	const unsigned int *nbytes_p;
+	const __be32 *nbytes_p;
 	unsigned int proplen;
 
 	nvram = of_find_node_by_type(NULL, "nvram");
@@ -487,7 +816,7 @@ int __init pSeries_nvram_init(void)
 		return -EIO;
 	}
 
-	nvram_size = *nbytes_p;
+	nvram_size = be32_to_cpup(nbytes_p);
 
 	nvram_fetch = rtas_token("nvram-fetch");
 	nvram_store = rtas_token("nvram-store");
@@ -501,70 +830,6 @@ int __init pSeries_nvram_init(void)
 	return 0;
 }
 
-/*
- * Are we using the ibm,rtas-log for oops/panic reports?  And if so,
- * would logging this oops/panic overwrite an RTAS event that rtas_errd
- * hasn't had a chance to read and process?  Return 1 if so, else 0.
- *
- * We assume that if rtas_errd hasn't read the RTAS event in
- * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
- */
-static int clobbering_unread_rtas_event(void)
-{
-	return (oops_log_partition.index == rtas_log_partition.index
-		&& last_unread_rtas_event
-		&& get_seconds() - last_unread_rtas_event <=
-						NVRAM_RTAS_READ_TIMEOUT);
-}
-
-/* Derived from logfs_compress() */
-static int nvram_compress(const void *in, void *out, size_t inlen,
-							size_t outlen)
-{
-	int err, ret;
-
-	ret = -EIO;
-	err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
-						MEM_LEVEL, Z_DEFAULT_STRATEGY);
-	if (err != Z_OK)
-		goto error;
-
-	stream.next_in = in;
-	stream.avail_in = inlen;
-	stream.total_in = 0;
-	stream.next_out = out;
-	stream.avail_out = outlen;
-	stream.total_out = 0;
-
-	err = zlib_deflate(&stream, Z_FINISH);
-	if (err != Z_STREAM_END)
-		goto error;
-
-	err = zlib_deflateEnd(&stream);
-	if (err != Z_OK)
-		goto error;
-
-	if (stream.total_out >= stream.total_in)
-		goto error;
-
-	ret = stream.total_out;
-error:
-	return ret;
-}
-
-/* Compress the text from big_oops_buf into oops_buf. */
-static int zip_oops(size_t text_len)
-{
-	int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
-								oops_data_sz);
-	if (zipped_len < 0) {
-		pr_err("nvram: compression failed; returned %d\n", zipped_len);
-		pr_err("nvram: logging uncompressed oops/panic report\n");
-		return -1;
-	}
-	*oops_len = (u16) zipped_len;
-	return 0;
-}
 
 /*
  * This is our kmsg_dump callback, called after an oops or panic report
@@ -576,6 +841,7 @@ static int zip_oops(size_t text_len)
 static void oops_to_nvram(struct kmsg_dumper *dumper,
 			  enum kmsg_dump_reason reason)
 {
+	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
 	static unsigned int oops_count = 0;
 	static bool panicking = false;
 	static DEFINE_SPINLOCK(lock);
@@ -602,7 +868,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
 		break;
 	default:
 		pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
-						__FUNCTION__, (int) reason);
+		       __func__, (int) reason);
 		return;
 	}
 
@@ -619,14 +885,17 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
 	}
 	if (rc != 0) {
 		kmsg_dump_rewind(dumper);
-		kmsg_dump_get_buffer(dumper, true,
+		kmsg_dump_get_buffer(dumper, false,
 				     oops_data, oops_data_sz, &text_len);
 		err_type = ERR_TYPE_KERNEL_PANIC;
-		*oops_len = (u16) text_len;
+		oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+		oops_hdr->report_length = cpu_to_be16(text_len);
+		oops_hdr->timestamp = cpu_to_be64(get_seconds());
 	}
 
 	(void) nvram_write_os_partition(&oops_log_partition, oops_buf,
-		(int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count);
+		(int) (sizeof(*oops_hdr) + text_len), err_type,
+		++oops_count);
 
 	spin_unlock_irqrestore(&lock, flags);
 }
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 5f93856cdf4..c413ec158ff 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -113,7 +113,8 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
 	struct device_node *dn, *pdn;
 	struct pci_bus *bus;
-	const uint32_t *pcie_link_speed_stats;
+	u32 pcie_link_speed_stats[2];
+	int rc;
 
 	bus = bridge->bus;
 
@@ -122,15 +123,16 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 		return 0;
 
 	for (pdn = dn; pdn != NULL; pdn = of_get_next_parent(pdn)) {
-		pcie_link_speed_stats = (const uint32_t *) of_get_property(pdn,
-			"ibm,pcie-link-speed-stats", NULL);
-		if (pcie_link_speed_stats)
+		rc = of_property_read_u32_array(pdn,
+				"ibm,pcie-link-speed-stats",
+				&pcie_link_speed_stats[0], 2);
+		if (!rc)
 			break;
 	}
 
 	of_node_put(pdn);
 
-	if (!pcie_link_speed_stats) {
+	if (rc) {
 		pr_err("no ibm,pcie-link-speed-stats property\n");
 		return 0;
 	}
@@ -142,6 +144,9 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 	case 0x02:
 		bus->max_bus_speed = PCIE_SPEED_5_0GT;
 		break;
+	case 0x04:
+		bus->max_bus_speed = PCIE_SPEED_8_0GT;
+		break;
 	default:
 		bus->max_bus_speed = PCI_SPEED_UNKNOWN;
 		break;
@@ -154,6 +159,9 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
 	case 0x02:
 		bus->cur_bus_speed = PCIE_SPEED_5_0GT;
 		break;
+	case 0x04:
+		bus->cur_bus_speed = PCIE_SPEED_8_0GT;
+		break;
 	default:
 		bus->cur_bus_speed = PCI_SPEED_UNKNOWN;
 		break;
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index c91b22be928..203cbf0dc10 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -37,15 +37,15 @@ find_bus_among_children(struct pci_bus *bus,
                         struct device_node *dn)
 {
 	struct pci_bus *child = NULL;
-	struct list_head *tmp;
+	struct pci_bus *tmp;
 	struct device_node *busdn;
 
 	busdn = pci_bus_to_OF_node(bus);
 	if (busdn == dn)
 		return bus;
 
-	list_for_each(tmp, &bus->children) {
-		child = find_bus_among_children(pci_bus_b(tmp), dn);
+	list_for_each_entry(tmp, &bus->children, node) {
+		child = find_bus_among_children(tmp, dn);
 		if (child)
 			break;
 	};
@@ -64,91 +64,6 @@ pcibios_find_pci_bus(struct device_node *dn)
 }
 EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
 
-/**
- * __pcibios_remove_pci_devices - remove all devices under this bus
- * @bus: the indicated PCI bus
- * @purge_pe: destroy the PE on removal of PCI devices
- *
- * Remove all of the PCI devices under this bus both from the
- * linux pci device tree, and from the powerpc EEH address cache.
- * By default, the corresponding PE will be destroied during the
- * normal PCI hotplug path. For PCI hotplug during EEH recovery,
- * the corresponding PE won't be destroied and deallocated.
- */
-void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe)
-{
-	struct pci_dev *dev, *tmp;
-	struct pci_bus *child_bus;
-
-	/* First go down child busses */
-	list_for_each_entry(child_bus, &bus->children, node)
-		__pcibios_remove_pci_devices(child_bus, purge_pe);
-
-	pr_debug("PCI: Removing devices on bus %04x:%02x\n",
-		pci_domain_nr(bus),  bus->number);
-	list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
-		pr_debug("     * Removing %s...\n", pci_name(dev));
-		eeh_remove_bus_device(dev, purge_pe);
-		pci_stop_and_remove_bus_device(dev);
-	}
-}
-
-/**
- * pcibios_remove_pci_devices - remove all devices under this bus
- *
- * Remove all of the PCI devices under this bus both from the
- * linux pci device tree, and from the powerpc EEH address cache.
- */
-void pcibios_remove_pci_devices(struct pci_bus *bus)
-{
-	__pcibios_remove_pci_devices(bus, 1);
-}
-EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
-
-/**
- * pcibios_add_pci_devices - adds new pci devices to bus
- *
- * This routine will find and fixup new pci devices under
- * the indicated bus. This routine presumes that there
- * might already be some devices under this bridge, so
- * it carefully tries to add only new devices.  (And that
- * is how this routine differs from other, similar pcibios
- * routines.)
- */
-void pcibios_add_pci_devices(struct pci_bus * bus)
-{
-	int slotno, num, mode, pass, max;
-	struct pci_dev *dev;
-	struct device_node *dn = pci_bus_to_OF_node(bus);
-
-	eeh_add_device_tree_early(dn);
-
-	mode = PCI_PROBE_NORMAL;
-	if (ppc_md.pci_probe_mode)
-		mode = ppc_md.pci_probe_mode(bus);
-
-	if (mode == PCI_PROBE_DEVTREE) {
-		/* use ofdt-based probe */
-		of_rescan_bus(dn, bus);
-	} else if (mode == PCI_PROBE_NORMAL) {
-		/* use legacy probe */
-		slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
-		num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
-		if (!num)
-			return;
-		pcibios_setup_bus_devices(bus);
-		max = bus->busn_res.start;
-		for (pass=0; pass < 2; pass++)
-			list_for_each_entry(dev, &bus->devices, bus_list) {
-			if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
-			    dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
-				max = pci_scan_bridge(bus, dev, max, pass);
-		}
-	}
-	pcibios_finish_adding_to_bus(bus);
-}
-EXPORT_SYMBOL_GPL(pcibios_add_pci_devices);
-
 struct pci_controller *init_phb_dynamic(struct device_node *dn)
 {
 	struct pci_controller *phb;
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
deleted file mode 100644
index f35787b6a5e..00000000000
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ /dev/null
@@ -1,324 +0,0 @@
-#ifndef _PSERIES_PLPAR_WRAPPERS_H
-#define _PSERIES_PLPAR_WRAPPERS_H
-
-#include <linux/string.h>
-#include <linux/irqflags.h>
-
-#include <asm/hvcall.h>
-#include <asm/paca.h>
-#include <asm/page.h>
-
-/* Get state of physical CPU from query_cpu_stopped */
-int smp_query_cpu_stopped(unsigned int pcpu);
-#define QCSS_STOPPED 0
-#define QCSS_STOPPING 1
-#define QCSS_NOT_STOPPED 2
-#define QCSS_HARDWARE_ERROR -1
-#define QCSS_HARDWARE_BUSY -2
-
-static inline long poll_pending(void)
-{
-	return plpar_hcall_norets(H_POLL_PENDING);
-}
-
-static inline u8 get_cede_latency_hint(void)
-{
-	return get_lppaca()->cede_latency_hint;
-}
-
-static inline void set_cede_latency_hint(u8 latency_hint)
-{
-	get_lppaca()->cede_latency_hint = latency_hint;
-}
-
-static inline long cede_processor(void)
-{
-	return plpar_hcall_norets(H_CEDE);
-}
-
-static inline long extended_cede_processor(unsigned long latency_hint)
-{
-	long rc;
-	u8 old_latency_hint = get_cede_latency_hint();
-
-	set_cede_latency_hint(latency_hint);
-
-	rc = cede_processor();
-#ifdef CONFIG_TRACE_IRQFLAGS
-		/* Ensure that H_CEDE returns with IRQs on */
-		if (WARN_ON(!(mfmsr() & MSR_EE)))
-			__hard_irq_enable();
-#endif
-
-	set_cede_latency_hint(old_latency_hint);
-
-	return rc;
-}
-
-static inline long vpa_call(unsigned long flags, unsigned long cpu,
-		unsigned long vpa)
-{
-	flags = flags << H_VPA_FUNC_SHIFT;
-
-	return plpar_hcall_norets(H_REGISTER_VPA, flags, cpu, vpa);
-}
-
-static inline long unregister_vpa(unsigned long cpu)
-{
-	return vpa_call(H_VPA_DEREG_VPA, cpu, 0);
-}
-
-static inline long register_vpa(unsigned long cpu, unsigned long vpa)
-{
-	return vpa_call(H_VPA_REG_VPA, cpu, vpa);
-}
-
-static inline long unregister_slb_shadow(unsigned long cpu)
-{
-	return vpa_call(H_VPA_DEREG_SLB, cpu, 0);
-}
-
-static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
-{
-	return vpa_call(H_VPA_REG_SLB, cpu, vpa);
-}
-
-static inline long unregister_dtl(unsigned long cpu)
-{
-	return vpa_call(H_VPA_DEREG_DTL, cpu, 0);
-}
-
-static inline long register_dtl(unsigned long cpu, unsigned long vpa)
-{
-	return vpa_call(H_VPA_REG_DTL, cpu, vpa);
-}
-
-static inline long plpar_page_set_loaned(unsigned long vpa)
-{
-	unsigned long cmo_page_sz = cmo_get_page_size();
-	long rc = 0;
-	int i;
-
-	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
-		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
-
-	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
-		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
-				   vpa + i - cmo_page_sz, 0);
-
-	return rc;
-}
-
-static inline long plpar_page_set_active(unsigned long vpa)
-{
-	unsigned long cmo_page_sz = cmo_get_page_size();
-	long rc = 0;
-	int i;
-
-	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
-		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
-
-	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
-		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
-				   vpa + i - cmo_page_sz, 0);
-
-	return rc;
-}
-
-extern void vpa_init(int cpu);
-
-static inline long plpar_pte_enter(unsigned long flags,
-		unsigned long hpte_group, unsigned long hpte_v,
-		unsigned long hpte_r, unsigned long *slot)
-{
-	long rc;
-	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-	rc = plpar_hcall(H_ENTER, retbuf, flags, hpte_group, hpte_v, hpte_r);
-
-	*slot = retbuf[0];
-
-	return rc;
-}
-
-static inline long plpar_pte_remove(unsigned long flags, unsigned long ptex,
-		unsigned long avpn, unsigned long *old_pteh_ret,
-		unsigned long *old_ptel_ret)
-{
-	long rc;
-	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-	rc = plpar_hcall(H_REMOVE, retbuf, flags, ptex, avpn);
-
-	*old_pteh_ret = retbuf[0];
-	*old_ptel_ret = retbuf[1];
-
-	return rc;
-}
-
-/* plpar_pte_remove_raw can be called in real mode. It calls plpar_hcall_raw */
-static inline long plpar_pte_remove_raw(unsigned long flags, unsigned long ptex,
-		unsigned long avpn, unsigned long *old_pteh_ret,
-		unsigned long *old_ptel_ret)
-{
-	long rc;
-	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-	rc = plpar_hcall_raw(H_REMOVE, retbuf, flags, ptex, avpn);
-
-	*old_pteh_ret = retbuf[0];
-	*old_ptel_ret = retbuf[1];
-
-	return rc;
-}
-
-static inline long plpar_pte_read(unsigned long flags, unsigned long ptex,
-		unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
-{
-	long rc;
-	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-	rc = plpar_hcall(H_READ, retbuf, flags, ptex);
-
-	*old_pteh_ret = retbuf[0];
-	*old_ptel_ret = retbuf[1];
-
-	return rc;
-}
-
-/* plpar_pte_read_raw can be called in real mode. It calls plpar_hcall_raw */
-static inline long plpar_pte_read_raw(unsigned long flags, unsigned long ptex,
-		unsigned long *old_pteh_ret, unsigned long *old_ptel_ret)
-{
-	long rc;
-	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-	rc = plpar_hcall_raw(H_READ, retbuf, flags, ptex);
-
-	*old_pteh_ret = retbuf[0];
-	*old_ptel_ret = retbuf[1];
-
-	return rc;
-}
-
-/*
- * plpar_pte_read_4_raw can be called in real mode.
- * ptes must be 8*sizeof(unsigned long)
- */
-static inline long plpar_pte_read_4_raw(unsigned long flags, unsigned long ptex,
-					unsigned long *ptes)
-
-{
-	long rc;
-	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
-
-	rc = plpar_hcall9_raw(H_READ, retbuf, flags | H_READ_4, ptex);
-
-	memcpy(ptes, retbuf, 8*sizeof(unsigned long));
-
-	return rc;
-}
-
-static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex,
-		unsigned long avpn)
-{
-	return plpar_hcall_norets(H_PROTECT, flags, ptex, avpn);
-}
-
-static inline long plpar_tce_get(unsigned long liobn, unsigned long ioba,
-		unsigned long *tce_ret)
-{
-	long rc;
-	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-	rc = plpar_hcall(H_GET_TCE, retbuf, liobn, ioba);
-
-	*tce_ret = retbuf[0];
-
-	return rc;
-}
-
-static inline long plpar_tce_put(unsigned long liobn, unsigned long ioba,
-		unsigned long tceval)
-{
-	return plpar_hcall_norets(H_PUT_TCE, liobn, ioba, tceval);
-}
-
-static inline long plpar_tce_put_indirect(unsigned long liobn,
-		unsigned long ioba, unsigned long page, unsigned long count)
-{
-	return plpar_hcall_norets(H_PUT_TCE_INDIRECT, liobn, ioba, page, count);
-}
-
-static inline long plpar_tce_stuff(unsigned long liobn, unsigned long ioba,
-		unsigned long tceval, unsigned long count)
-{
-	return plpar_hcall_norets(H_STUFF_TCE, liobn, ioba, tceval, count);
-}
-
-static inline long plpar_get_term_char(unsigned long termno,
-		unsigned long *len_ret, char *buf_ret)
-{
-	long rc;
-	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-	unsigned long *lbuf = (unsigned long *)buf_ret;	/* TODO: alignment? */
-
-	rc = plpar_hcall(H_GET_TERM_CHAR, retbuf, termno);
-
-	*len_ret = retbuf[0];
-	lbuf[0] = retbuf[1];
-	lbuf[1] = retbuf[2];
-
-	return rc;
-}
-
-static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
-		const char *buffer)
-{
-	unsigned long *lbuf = (unsigned long *)buffer;	/* TODO: alignment? */
-	return plpar_hcall_norets(H_PUT_TERM_CHAR, termno, len, lbuf[0],
-			lbuf[1]);
-}
-
-/* Set various resource mode parameters */
-static inline long plpar_set_mode(unsigned long mflags, unsigned long resource,
-		unsigned long value1, unsigned long value2)
-{
-	return plpar_hcall_norets(H_SET_MODE, mflags, resource, value1, value2);
-}
-
-/*
- * Enable relocation on exceptions on this partition
- *
- * Note: this call has a partition wide scope and can take a while to complete.
- * If it returns H_LONG_BUSY_* it should be retried periodically until it
- * returns H_SUCCESS.
- */
-static inline long enable_reloc_on_exceptions(void)
-{
-	/* mflags = 3: Exceptions at 0xC000000000004000 */
-	return plpar_set_mode(3, 3, 0, 0);
-}
-
-/*
- * Disable relocation on exceptions on this partition
- *
- * Note: this call has a partition wide scope and can take a while to complete.
- * If it returns H_LONG_BUSY_* it should be retried periodically until it
- * returns H_SUCCESS.
- */
-static inline long disable_reloc_on_exceptions(void) {
-	return plpar_set_mode(0, 3, 0, 0);
-}
-
-static inline long plapr_set_ciabr(unsigned long ciabr)
-{
-	return plpar_set_mode(0, 1, ciabr, 0);
-}
-
-static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
-{
-	return plpar_set_mode(0, 2, dawr0, dawrx0);
-}
-
-#endif /* _PSERIES_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
deleted file mode 100644
index 4644efa0694..00000000000
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ /dev/null
@@ -1,362 +0,0 @@
-/*
- *  processor_idle - idle state cpuidle driver.
- *  Adapted from drivers/idle/intel_idle.c and
- *  drivers/acpi/processor_idle.c
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/moduleparam.h>
-#include <linux/cpuidle.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-
-#include <asm/paca.h>
-#include <asm/reg.h>
-#include <asm/machdep.h>
-#include <asm/firmware.h>
-#include <asm/runlatch.h>
-
-#include "plpar_wrappers.h"
-#include "pseries.h"
-
-struct cpuidle_driver pseries_idle_driver = {
-	.name             = "pseries_idle",
-	.owner            = THIS_MODULE,
-};
-
-#define MAX_IDLE_STATE_COUNT	2
-
-static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
-static struct cpuidle_device __percpu *pseries_cpuidle_devices;
-static struct cpuidle_state *cpuidle_state_table;
-
-static inline void idle_loop_prolog(unsigned long *in_purr)
-{
-	*in_purr = mfspr(SPRN_PURR);
-	/*
-	 * Indicate to the HV that we are idle. Now would be
-	 * a good time to find other work to dispatch.
-	 */
-	get_lppaca()->idle = 1;
-}
-
-static inline void idle_loop_epilog(unsigned long in_purr)
-{
-	get_lppaca()->wait_state_cycles += mfspr(SPRN_PURR) - in_purr;
-	get_lppaca()->idle = 0;
-}
-
-static int snooze_loop(struct cpuidle_device *dev,
-			struct cpuidle_driver *drv,
-			int index)
-{
-	unsigned long in_purr;
-	int cpu = dev->cpu;
-
-	idle_loop_prolog(&in_purr);
-	local_irq_enable();
-	set_thread_flag(TIF_POLLING_NRFLAG);
-
-	while ((!need_resched()) && cpu_online(cpu)) {
-		ppc64_runlatch_off();
-		HMT_low();
-		HMT_very_low();
-	}
-
-	HMT_medium();
-	clear_thread_flag(TIF_POLLING_NRFLAG);
-	smp_mb();
-
-	idle_loop_epilog(in_purr);
-
-	return index;
-}
-
-static void check_and_cede_processor(void)
-{
-	/*
-	 * Ensure our interrupt state is properly tracked,
-	 * also checks if no interrupt has occurred while we
-	 * were soft-disabled
-	 */
-	if (prep_irq_for_idle()) {
-		cede_processor();
-#ifdef CONFIG_TRACE_IRQFLAGS
-		/* Ensure that H_CEDE returns with IRQs on */
-		if (WARN_ON(!(mfmsr() & MSR_EE)))
-			__hard_irq_enable();
-#endif
-	}
-}
-
-static int dedicated_cede_loop(struct cpuidle_device *dev,
-				struct cpuidle_driver *drv,
-				int index)
-{
-	unsigned long in_purr;
-
-	idle_loop_prolog(&in_purr);
-	get_lppaca()->donate_dedicated_cpu = 1;
-
-	ppc64_runlatch_off();
-	HMT_medium();
-	check_and_cede_processor();
-
-	get_lppaca()->donate_dedicated_cpu = 0;
-
-	idle_loop_epilog(in_purr);
-
-	return index;
-}
-
-static int shared_cede_loop(struct cpuidle_device *dev,
-			struct cpuidle_driver *drv,
-			int index)
-{
-	unsigned long in_purr;
-
-	idle_loop_prolog(&in_purr);
-
-	/*
-	 * Yield the processor to the hypervisor.  We return if
-	 * an external interrupt occurs (which are driven prior
-	 * to returning here) or if a prod occurs from another
-	 * processor. When returning here, external interrupts
-	 * are enabled.
-	 */
-	check_and_cede_processor();
-
-	idle_loop_epilog(in_purr);
-
-	return index;
-}
-
-/*
- * States for dedicated partition case.
- */
-static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
-	{ /* Snooze */
-		.name = "snooze",
-		.desc = "snooze",
-		.flags = CPUIDLE_FLAG_TIME_VALID,
-		.exit_latency = 0,
-		.target_residency = 0,
-		.enter = &snooze_loop },
-	{ /* CEDE */
-		.name = "CEDE",
-		.desc = "CEDE",
-		.flags = CPUIDLE_FLAG_TIME_VALID,
-		.exit_latency = 10,
-		.target_residency = 100,
-		.enter = &dedicated_cede_loop },
-};
-
-/*
- * States for shared partition case.
- */
-static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
-	{ /* Shared Cede */
-		.name = "Shared Cede",
-		.desc = "Shared Cede",
-		.flags = CPUIDLE_FLAG_TIME_VALID,
-		.exit_latency = 0,
-		.target_residency = 0,
-		.enter = &shared_cede_loop },
-};
-
-void update_smt_snooze_delay(int cpu, int residency)
-{
-	struct cpuidle_driver *drv = cpuidle_get_driver();
-	struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
-
-	if (cpuidle_state_table != dedicated_states)
-		return;
-
-	if (residency < 0) {
-		/* Disable the Nap state on that cpu */
-		if (dev)
-			dev->states_usage[1].disable = 1;
-	} else
-		if (drv)
-			drv->states[1].target_residency = residency;
-}
-
-static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
-			unsigned long action, void *hcpu)
-{
-	int hotcpu = (unsigned long)hcpu;
-	struct cpuidle_device *dev =
-			per_cpu_ptr(pseries_cpuidle_devices, hotcpu);
-
-	if (dev && cpuidle_get_driver()) {
-		switch (action) {
-		case CPU_ONLINE:
-		case CPU_ONLINE_FROZEN:
-			cpuidle_pause_and_lock();
-			cpuidle_enable_device(dev);
-			cpuidle_resume_and_unlock();
-			break;
-
-		case CPU_DEAD:
-		case CPU_DEAD_FROZEN:
-			cpuidle_pause_and_lock();
-			cpuidle_disable_device(dev);
-			cpuidle_resume_and_unlock();
-			break;
-
-		default:
-			return NOTIFY_DONE;
-		}
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block setup_hotplug_notifier = {
-	.notifier_call = pseries_cpuidle_add_cpu_notifier,
-};
-
-/*
- * pseries_cpuidle_driver_init()
- */
-static int pseries_cpuidle_driver_init(void)
-{
-	int idle_state;
-	struct cpuidle_driver *drv = &pseries_idle_driver;
-
-	drv->state_count = 0;
-
-	for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) {
-
-		if (idle_state > max_idle_state)
-			break;
-
-		/* is the state not enabled? */
-		if (cpuidle_state_table[idle_state].enter == NULL)
-			continue;
-
-		drv->states[drv->state_count] =	/* structure copy */
-			cpuidle_state_table[idle_state];
-
-		drv->state_count += 1;
-	}
-
-	return 0;
-}
-
-/* pseries_idle_devices_uninit(void)
- * unregister cpuidle devices and de-allocate memory
- */
-static void pseries_idle_devices_uninit(void)
-{
-	int i;
-	struct cpuidle_device *dev;
-
-	for_each_possible_cpu(i) {
-		dev = per_cpu_ptr(pseries_cpuidle_devices, i);
-		cpuidle_unregister_device(dev);
-	}
-
-	free_percpu(pseries_cpuidle_devices);
-	return;
-}
-
-/* pseries_idle_devices_init()
- * allocate, initialize and register cpuidle device
- */
-static int pseries_idle_devices_init(void)
-{
-	int i;
-	struct cpuidle_driver *drv = &pseries_idle_driver;
-	struct cpuidle_device *dev;
-
-	pseries_cpuidle_devices = alloc_percpu(struct cpuidle_device);
-	if (pseries_cpuidle_devices == NULL)
-		return -ENOMEM;
-
-	for_each_possible_cpu(i) {
-		dev = per_cpu_ptr(pseries_cpuidle_devices, i);
-		dev->state_count = drv->state_count;
-		dev->cpu = i;
-		if (cpuidle_register_device(dev)) {
-			printk(KERN_DEBUG \
-				"cpuidle_register_device %d failed!\n", i);
-			return -EIO;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * pseries_idle_probe()
- * Choose state table for shared versus dedicated partition
- */
-static int pseries_idle_probe(void)
-{
-
-	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
-		return -ENODEV;
-
-	if (cpuidle_disable != IDLE_NO_OVERRIDE)
-		return -ENODEV;
-
-	if (max_idle_state == 0) {
-		printk(KERN_DEBUG "pseries processor idle disabled.\n");
-		return -EPERM;
-	}
-
-	if (get_lppaca()->shared_proc)
-		cpuidle_state_table = shared_states;
-	else
-		cpuidle_state_table = dedicated_states;
-
-	return 0;
-}
-
-static int __init pseries_processor_idle_init(void)
-{
-	int retval;
-
-	retval = pseries_idle_probe();
-	if (retval)
-		return retval;
-
-	pseries_cpuidle_driver_init();
-	retval = cpuidle_register_driver(&pseries_idle_driver);
-	if (retval) {
-		printk(KERN_DEBUG "Registration of pseries driver failed.\n");
-		return retval;
-	}
-
-	retval = pseries_idle_devices_init();
-	if (retval) {
-		pseries_idle_devices_uninit();
-		cpuidle_unregister_driver(&pseries_idle_driver);
-		return retval;
-	}
-
-	register_cpu_notifier(&setup_hotplug_notifier);
-	printk(KERN_DEBUG "pseries_idle_driver registered\n");
-
-	return 0;
-}
-
-static void __exit pseries_processor_idle_exit(void)
-{
-
-	unregister_cpu_notifier(&setup_hotplug_notifier);
-	pseries_idle_devices_uninit();
-	cpuidle_unregister_driver(&pseries_idle_driver);
-
-	return;
-}
-
-module_init(pseries_processor_idle_init);
-module_exit(pseries_processor_idle_exit);
-
-MODULE_AUTHOR("Deepthi Dharwar <deepthi@linux.vnet.ibm.com>");
-MODULE_DESCRIPTION("Cpuidle driver for POWER");
-MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index c2a3a258001..361add62abf 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -56,15 +56,14 @@ extern void hvc_vio_init_early(void);
 /* Dynamic logical Partitioning/Mobility */
 extern void dlpar_free_cc_nodes(struct device_node *);
 extern void dlpar_free_cc_property(struct property *);
-extern struct device_node *dlpar_configure_connector(u32);
+extern struct device_node *dlpar_configure_connector(u32, struct device_node *);
 extern int dlpar_attach_node(struct device_node *);
 extern int dlpar_detach_node(struct device_node *);
 
-/* Snooze Delay, pseries_idle */
-DECLARE_PER_CPU(long, smt_snooze_delay);
-
 /* PCI root bridge prepare function override for pseries */
 struct pci_host_bridge;
 int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
 
+unsigned long pseries_memory_block_size(void);
+
 #endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index a91e6dadda2..92767791f93 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -108,8 +108,8 @@ err:
  * energy consumption.
  */
 
-#define FLAGS_MODE1	0x004E200000080E01
-#define FLAGS_MODE2	0x004E200000080401
+#define FLAGS_MODE1	0x004E200000080E01UL
+#define FLAGS_MODE2	0x004E200000080401UL
 #define FLAGS_ACTIVATE  0x100
 
 static ssize_t get_best_energy_list(char *page, int activate)
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c4dfccd3a3d..9c5778e6ed4 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -83,7 +83,7 @@ static void handle_system_shutdown(char event_modifier)
 	switch (event_modifier) {
 	case EPOW_SHUTDOWN_NORMAL:
 		pr_emerg("Firmware initiated power off");
-		orderly_poweroff(1);
+		orderly_poweroff(true);
 		break;
 
 	case EPOW_SHUTDOWN_ON_UPS:
@@ -95,13 +95,13 @@ static void handle_system_shutdown(char event_modifier)
 		pr_emerg("Loss of system critical functions reported by "
 			"firmware");
 		pr_emerg("Check RTAS error log for details");
-		orderly_poweroff(1);
+		orderly_poweroff(true);
 		break;
 
 	case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
 		pr_emerg("Ambient temperature too high reported by firmware");
 		pr_emerg("Check RTAS error log for details");
-		orderly_poweroff(1);
+		orderly_poweroff(true);
 		break;
 
 	default:
@@ -162,7 +162,7 @@ void rtas_parse_epow_errlog(struct rtas_error_log *log)
 
 	case EPOW_SYSTEM_HALT:
 		pr_emerg("Firmware initiated power off");
-		orderly_poweroff(1);
+		orderly_poweroff(true);
 		break;
 
 	case EPOW_MAIN_ENCLOSURE:
@@ -236,7 +236,8 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 
 	rtas_elog = (struct rtas_error_log *)ras_log_buf;
 
-	if ((status == 0) && (rtas_elog->severity >= RTAS_SEVERITY_ERROR_SYNC))
+	if (status == 0 &&
+	    rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
 		fatal = 1;
 	else
 		fatal = 0;
@@ -287,6 +288,9 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 	unsigned long *savep;
 	struct rtas_error_log *h, *errhdr = NULL;
 
+	/* Mask top two bits */
+	regs->gpr[3] &= ~(0x3UL << 62);
+
 	if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
 		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 		return NULL;
@@ -297,13 +301,14 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 
 	/* If it isn't an extended log we can use the per cpu 64bit buffer */
 	h = (struct rtas_error_log *)&savep[1];
-	if (!h->extended) {
+	if (!rtas_error_extended(h)) {
 		memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64));
 		errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf);
 	} else {
-		int len;
+		int len, error_log_length;
 
-		len = max_t(int, 8+h->extended_log_length, RTAS_ERROR_LOG_MAX);
+		error_log_length = 8 + rtas_error_extended_log_length(h);
+		len = max_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
 		memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
 		memcpy(global_mce_data_buf, h, len);
 		errhdr = (struct rtas_error_log *)global_mce_data_buf;
@@ -347,23 +352,24 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
 static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
 {
 	int recovered = 0;
+	int disposition = rtas_error_disposition(err);
 
 	if (!(regs->msr & MSR_RI)) {
 		/* If MSR_RI isn't set, we cannot recover */
 		recovered = 0;
 
-	} else if (err->disposition == RTAS_DISP_FULLY_RECOVERED) {
+	} else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
 		/* Platform corrected itself */
 		recovered = 1;
 
-	} else if (err->disposition == RTAS_DISP_LIMITED_RECOVERY) {
+	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
 		/* Platform corrected itself but could be degraded */
 		printk(KERN_ERR "MCE: limited recovery, system may "
 		       "be degraded\n");
 		recovered = 1;
 
 	} else if (user_mode(regs) && !is_global_init(current) &&
-		   err->severity == RTAS_SEVERITY_ERROR_SYNC) {
+		   rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) {
 
 		/*
 		 * If we received a synchronous error when in userspace
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index f93cdf55628..1c0a60d9886 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -12,7 +12,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/kref.h>
 #include <linux/notifier.h>
 #include <linux/proc_fs.h>
 #include <linux/slab.h>
@@ -70,7 +69,7 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist
 
 	np->properties = proplist;
 	of_node_set_flag(np, OF_DYNAMIC);
-	kref_init(&np->kref);
+	of_node_init(np);
 
 	np->parent = derive_parent(path);
 	if (IS_ERR(np->parent)) {
diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c
new file mode 100644
index 00000000000..72a102758d4
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rng.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt)	"pseries-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <asm/archrandom.h>
+#include <asm/machdep.h>
+#include <asm/plpar_wrappers.h>
+
+
+static int pseries_get_random_long(unsigned long *v)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	if (plpar_hcall(H_RANDOM, retbuf) == H_SUCCESS) {
+		*v = retbuf[0];
+		return 1;
+	}
+
+	return 0;
+}
+
+static __init int rng_init(void)
+{
+	struct device_node *dn;
+
+	dn = of_find_compatible_node(NULL, NULL, "ibm,random");
+	if (!dn)
+		return -ENODEV;
+
+	pr_info("Registering arch random hook.\n");
+
+	ppc_md.get_random_long = pseries_get_random_long;
+
+	return 0;
+}
+subsys_initcall(rng_init);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index c11c8238797..f2f40e64658 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -39,7 +39,6 @@
 #include <linux/irq.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
-#include <linux/cpuidle.h>
 #include <linux/of.h>
 #include <linux/kexec.h>
 
@@ -66,13 +65,13 @@
 #include <asm/firmware.h>
 #include <asm/eeh.h>
 #include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
 
-#include "plpar_wrappers.h"
 #include "pseries.h"
 
 int CMO_PrPSP = -1;
 int CMO_SecPSP = -1;
-unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT);
+unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
 EXPORT_SYMBOL(CMO_PageSize);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
@@ -183,7 +182,7 @@ static void __init pseries_mpic_init_IRQ(void)
 	np = of_find_node_by_path("/");
 	naddr = of_n_addr_cells(np);
 	opprop = of_get_property(np, "platform-open-pic", &opplen);
-	if (opprop != 0) {
+	if (opprop != NULL) {
 		openpic_addr = of_read_number(opprop, naddr);
 		printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
 	}
@@ -323,7 +322,7 @@ static int alloc_dispatch_logs(void)
 	get_paca()->lppaca_ptr->dtl_idx = 0;
 
 	/* hypervisor reads buffer length from this field */
-	dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
+	dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
 	ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
 	if (ret)
 		pr_err("WARNING: DTL registration of cpu %d (hw %d) failed "
@@ -354,19 +353,26 @@ static int alloc_dispatch_log_kmem_cache(void)
 }
 early_initcall(alloc_dispatch_log_kmem_cache);
 
-static void pSeries_idle(void)
+static void pseries_lpar_idle(void)
 {
-	/* This would call on the cpuidle framework, and the back-end pseries
-	 * driver to  go to idle states
+	/*
+	 * Default handler to go into low thread priority and possibly
+	 * low power mode by cedeing processor to hypervisor
 	 */
-	if (cpuidle_idle_call()) {
-		/* On error, execute default handler
-		 * to go into low thread priority and possibly
-		 * low power mode.
-		 */
-		HMT_low();
-		HMT_very_low();
-	}
+
+	/* Indicate to hypervisor that we are idle. */
+	get_lppaca()->idle = 1;
+
+	/*
+	 * Yield the processor to the hypervisor.  We return if
+	 * an external interrupt occurs (which are driven prior
+	 * to returning here) or if a prod occurs from another
+	 * processor. When returning here, external interrupts
+	 * are enabled.
+	 */
+	cede_processor();
+
+	get_lppaca()->idle = 0;
 }
 
 /*
@@ -418,8 +424,7 @@ static void pSeries_machine_kexec(struct kimage *image)
 {
 	long rc;
 
-	if (firmware_has_feature(FW_FEATURE_SET_MODE) &&
-	    (image->type != KEXEC_TYPE_CRASH)) {
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		rc = pSeries_disable_reloc_on_exc();
 		if (rc != H_SUCCESS)
 			pr_warning("Warning: Failed to disable relocation on "
@@ -430,9 +435,35 @@ static void pSeries_machine_kexec(struct kimage *image)
 }
 #endif
 
+#ifdef __LITTLE_ENDIAN__
+long pseries_big_endian_exceptions(void)
+{
+	long rc;
+
+	while (1) {
+		rc = enable_big_endian_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			return rc;
+		mdelay(get_longbusy_msecs(rc));
+	}
+}
+
+static long pseries_little_endian_exceptions(void)
+{
+	long rc;
+
+	while (1) {
+		rc = enable_little_endian_exceptions();
+		if (!H_IS_LONG_BUSY(rc))
+			return rc;
+		mdelay(get_longbusy_msecs(rc));
+	}
+}
+#endif
+
 static void __init pSeries_setup_arch(void)
 {
-	panic_timeout = 10;
+	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 
 	/* Discover PIC type and setup ppc_md accordingly */
 	pseries_discover_pic();
@@ -456,15 +487,14 @@ static void __init pSeries_setup_arch(void)
 
 	pSeries_nvram_init();
 
-	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
 		vpa_init(boot_cpuid);
-		ppc_md.power_save = pSeries_idle;
-	}
-
-	if (firmware_has_feature(FW_FEATURE_LPAR))
+		ppc_md.power_save = pseries_lpar_idle;
 		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
-	else
+	} else {
+		/* No special idle routine */
 		ppc_md.enable_pmcs = power4_enable_pmcs;
+	}
 
 	ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
 
@@ -480,7 +510,11 @@ static void __init pSeries_setup_arch(void)
 static int __init pSeries_init_panel(void)
 {
 	/* Manually leave the kernel version on the panel. */
+#ifdef __BIG_ENDIAN__
 	ppc_md.progress("Linux ppc64\n", 0);
+#else
+	ppc_md.progress("Linux ppc64le\n", 0);
+#endif
 	ppc_md.progress(init_utsname()->version, 0);
 
 	return 0;
@@ -532,7 +566,7 @@ void pSeries_cmo_feature_init(void)
 {
 	char *ptr, *key, *value, *end;
 	int call_status;
-	int page_order = IOMMU_PAGE_SHIFT;
+	int page_order = IOMMU_PAGE_SHIFT_4K;
 
 	pr_debug(" -> fw_cmo_feature_init()\n");
 	spin_lock(&rtas_data_buf_lock);
@@ -635,7 +669,7 @@ static int __init pseries_probe_fw_features(unsigned long node,
 					    void *data)
 {
 	const char *prop;
-	unsigned long len;
+	int len;
 	static int hypertas_found;
 	static int vec5_found;
 
@@ -668,7 +702,7 @@ static int __init pseries_probe_fw_features(unsigned long node,
 static int __init pSeries_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
- 	char *dtype = of_get_flat_dt_prop(root, "device_type", NULL);
+	const char *dtype = of_get_flat_dt_prop(root, "device_type", NULL);
 
  	if (dtype == NULL)
  		return 0;
@@ -687,6 +721,22 @@ static int __init pSeries_probe(void)
 	/* Now try to figure out if we are running on LPAR */
 	of_scan_flat_dt(pseries_probe_fw_features, NULL);
 
+#ifdef __LITTLE_ENDIAN__
+	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+		long rc;
+		/*
+		 * Tell the hypervisor that we want our exceptions to
+		 * be taken in little endian mode. If this fails we don't
+		 * want to use BUG() because it will trigger an exception.
+		 */
+		rc = pseries_little_endian_exceptions();
+		if (rc) {
+			ppc_md.progress("H_SET_MODE LE exception fail", 0);
+			panic("Could not enable little endian exceptions");
+		}
+	}
+#endif
+
 	if (firmware_has_feature(FW_FEATURE_LPAR))
 		hpte_init_lpar();
 	else
@@ -760,4 +810,7 @@ define_machine(pseries) {
 #ifdef CONFIG_KEXEC
 	.machine_kexec          = pSeries_machine_kexec,
 #endif
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+	.memory_block_size	= pseries_memory_block_size,
+#endif
 };
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 12bc8c3663a..a3555b10c1a 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -43,8 +43,9 @@
 #include <asm/cputhreads.h>
 #include <asm/xics.h>
 #include <asm/dbell.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/code-patching.h>
 
-#include "plpar_wrappers.h"
 #include "pseries.h"
 #include "offline_states.h"
 
@@ -96,8 +97,8 @@ int smp_query_cpu_stopped(unsigned int pcpu)
 static inline int smp_startup_cpu(unsigned int lcpu)
 {
 	int status;
-	unsigned long start_here = __pa((u32)*((unsigned long *)
-					       generic_secondary_smp_init));
+	unsigned long start_here =
+			__pa(ppc_function_entry(generic_secondary_smp_init));
 	unsigned int pcpu;
 	int start_cpu;
 
@@ -187,22 +188,6 @@ static int smp_pSeries_kick_cpu(int nr)
 	return 0;
 }
 
-static int smp_pSeries_cpu_bootable(unsigned int nr)
-{
-	/* Special case - we inhibit secondary thread startup
-	 * during boot if the user requests it.
-	 */
-	if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
-		if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
-			return 0;
-		if (smt_enabled_at_boot
-		    && cpu_thread_in_core(nr) >= smt_enabled_at_boot)
-			return 0;
-	}
-
-	return 1;
-}
-
 /* Only used on systems that support multiple IPI mechanisms */
 static void pSeries_cause_ipi_mux(int cpu, unsigned long data)
 {
@@ -237,7 +222,7 @@ static struct smp_ops_t pSeries_xics_smp_ops = {
 	.probe		= pSeries_smp_probe,
 	.kick_cpu	= smp_pSeries_kick_cpu,
 	.setup_cpu	= smp_xics_setup_cpu,
-	.cpu_bootable	= smp_pSeries_cpu_bootable,
+	.cpu_bootable	= smp_generic_cpu_bootable,
 };
 
 /* This is called very early */
@@ -249,18 +234,24 @@ static void __init smp_init_pseries(void)
 
 	alloc_bootmem_cpumask_var(&of_spin_mask);
 
-	/* Mark threads which are still spinning in hold loops. */
-	if (cpu_has_feature(CPU_FTR_SMT)) {
-		for_each_present_cpu(i) { 
-			if (cpu_thread_in_core(i) == 0)
-				cpumask_set_cpu(i, of_spin_mask);
-		}
-	} else {
-		cpumask_copy(of_spin_mask, cpu_present_mask);
+	/*
+	 * Mark threads which are still spinning in hold loops
+	 *
+	 * We know prom_init will not have started them if RTAS supports
+	 * query-cpu-stopped-state.
+	 */
+	if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) {
+		if (cpu_has_feature(CPU_FTR_SMT)) {
+			for_each_present_cpu(i) {
+				if (cpu_thread_in_core(i) == 0)
+					cpumask_set_cpu(i, of_spin_mask);
+			}
+		} else
+			cpumask_copy(of_spin_mask, cpu_present_mask);
+
+		cpumask_clear_cpu(boot_cpuid, of_spin_mask);
 	}
 
-	cpumask_clear_cpu(boot_cpuid, of_spin_mask);
-
 	/* Non-lpar has additional take/give timebase */
 	if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
 		smp_ops->give_timebase = rtas_give_timebase;
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index 5f997e79d57..b87b97849d4 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -26,6 +26,7 @@
 #include <asm/mmu.h>
 #include <asm/rtas.h>
 #include <asm/topology.h>
+#include "../../kernel/cacheinfo.h"
 
 static u64 stream_id;
 static struct device suspend_dev;
@@ -79,6 +80,23 @@ static int pseries_suspend_cpu(void)
 }
 
 /**
+ * pseries_suspend_enable_irqs
+ *
+ * Post suspend configuration updates
+ *
+ **/
+static void pseries_suspend_enable_irqs(void)
+{
+	/*
+	 * Update configuration which can be modified based on device tree
+	 * changes during resume.
+	 */
+	cacheinfo_cpu_offline(smp_processor_id());
+	post_mobility_fixup();
+	cacheinfo_cpu_online(smp_processor_id());
+}
+
+/**
  * pseries_suspend_enter - Final phase of hibernation
  *
  * Return value:
@@ -106,7 +124,7 @@ static int pseries_prepare_late(void)
 	atomic_set(&suspend_data.done, 0);
 	atomic_set(&suspend_data.error, 0);
 	suspend_data.complete = &suspend_work;
-	INIT_COMPLETION(suspend_work);
+	reinit_completion(&suspend_work);
 	return 0;
 }
 
@@ -174,7 +192,30 @@ out:
 	return rc;
 }
 
-static DEVICE_ATTR(hibernate, S_IWUSR, NULL, store_hibernate);
+#define USER_DT_UPDATE	0
+#define KERN_DT_UPDATE	1
+
+/**
+ * show_hibernate - Report device tree update responsibilty
+ * @dev:		subsys root device
+ * @attr:		device attribute struct
+ * @buf:		buffer
+ *
+ * Report whether a device tree update is performed by the kernel after a
+ * resume, or if drmgr must coordinate the update from user space.
+ *
+ * Return value:
+ *	0 if drmgr is to initiate update, and 1 otherwise
+ **/
+static ssize_t show_hibernate(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "%d\n", KERN_DT_UPDATE);
+}
+
+static DEVICE_ATTR(hibernate, S_IWUSR | S_IRUGO,
+		   show_hibernate, store_hibernate);
 
 static struct bus_type suspend_subsys = {
 	.name = "power",
@@ -235,6 +276,7 @@ static int __init pseries_suspend_init(void)
 		return rc;
 
 	ppc_md.suspend_disable_cpu = pseries_suspend_cpu;
+	ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs;
 	suspend_set_ops(&pseries_suspend_ops);
 	return 0;
 }
diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig
deleted file mode 100644
index 422a175b10e..00000000000
--- a/arch/powerpc/platforms/wsp/Kconfig
+++ /dev/null
@@ -1,30 +0,0 @@
-config PPC_WSP
-	bool
-	select PPC_A2
-	select GENERIC_TBSYNC
-	select PPC_ICSWX
-	select PPC_SCOM
-	select PPC_XICS
-	select PPC_ICP_NATIVE
-	select PCI
-	select PPC_IO_WORKAROUNDS if PCI
-	select PPC_INDIRECT_PIO if PCI
-	default n
-
-menu "WSP platform selection"
-	depends on PPC_BOOK3E_64
-
-config PPC_PSR2
-	bool "PowerEN System Reference Platform 2"
-	select EPAPR_BOOT
-	select PPC_WSP
-	default y
-
-config PPC_CHROMA
-	bool "PowerEN PCIe Chroma Card"
-	select EPAPR_BOOT
-	select PPC_WSP
-	select OF_DYNAMIC
-	default y
-
-endmenu
diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile
deleted file mode 100644
index 162fc60125a..00000000000
--- a/arch/powerpc/platforms/wsp/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-ccflags-y			+= $(NO_MINIMAL_TOC)
-
-obj-y				+= setup.o ics.o wsp.o
-obj-$(CONFIG_PPC_PSR2)		+= psr2.o
-obj-$(CONFIG_PPC_CHROMA)	+= chroma.o h8.o
-obj-$(CONFIG_PPC_WSP)		+= opb_pic.o
-obj-$(CONFIG_PPC_WSP)		+= scom_wsp.o
-obj-$(CONFIG_SMP)		+= smp.o scom_smp.o
-obj-$(CONFIG_PCI)		+= wsp_pci.o
-obj-$(CONFIG_PCI_MSI)		+= msi.o
diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c
deleted file mode 100644
index 8ef53bc2e70..00000000000
--- a/arch/powerpc/platforms/wsp/chroma.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-#include <linux/time.h>
-
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "ics.h"
-#include "wsp.h"
-
-void __init chroma_setup_arch(void)
-{
-	wsp_setup_arch();
-	wsp_setup_h8();
-
-}
-
-static int __init chroma_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (!of_flat_dt_is_compatible(root, "ibm,wsp-chroma"))
-		return 0;
-
-	return 1;
-}
-
-define_machine(chroma_md) {
-	.name			= "Chroma PCIe",
-	.probe			= chroma_probe,
-	.setup_arch		= chroma_setup_arch,
-	.restart		= wsp_h8_restart,
-	.power_off		= wsp_h8_power_off,
-	.halt			= wsp_halt,
-	.calibrate_decr		= generic_calibrate_decr,
-	.init_IRQ		= wsp_setup_irq,
-	.progress		= udbg_progress,
-	.power_save		= book3e_idle,
-};
-
-machine_arch_initcall(chroma_md, wsp_probe_devices);
diff --git a/arch/powerpc/platforms/wsp/h8.c b/arch/powerpc/platforms/wsp/h8.c
deleted file mode 100644
index d18e6cc19df..00000000000
--- a/arch/powerpc/platforms/wsp/h8.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/io.h>
-
-#include "wsp.h"
-
-/*
- * The UART connection to the H8 is over ttyS1 which is just a 16550.
- * We assume that FW has it setup right and no one messes with it.
- */
-
-
-static u8 __iomem *h8;
-
-#define RBR 0		/* Receiver Buffer Register */
-#define THR 0		/* Transmitter Holding Register */
-#define LSR 5		/* Line Status Register */
-#define LSR_DR 0x01	/* LSR value for Data-Ready */
-#define LSR_THRE 0x20	/* LSR value for Transmitter-Holding-Register-Empty */
-static void wsp_h8_putc(int c)
-{
-	u8 lsr;
-
-	do {
-		lsr = readb(h8 + LSR);
-	} while ((lsr & LSR_THRE) != LSR_THRE);
-	writeb(c, h8 + THR);
-}
-
-static int wsp_h8_getc(void)
-{
-	u8 lsr;
-
-	do {
-		lsr = readb(h8 + LSR);
-	} while ((lsr & LSR_DR) != LSR_DR);
-
-	return readb(h8 + RBR);
-}
-
-static void wsp_h8_puts(const char *s, int sz)
-{
-	int i;
-
-	for (i = 0; i < sz; i++) {
-		wsp_h8_putc(s[i]);
-
-		/* no flow control so wait for echo */
-		wsp_h8_getc();
-	}
-	wsp_h8_putc('\r');
-	wsp_h8_putc('\n');
-}
-
-static void wsp_h8_terminal_cmd(const char *cmd, int sz)
-{
-	hard_irq_disable();
-	wsp_h8_puts(cmd, sz);
-	/* should never return, but just in case */
-	for (;;)
-		continue;
-}
-
-
-void wsp_h8_restart(char *cmd)
-{
-	static const char restart[] = "warm-reset";
-
-	(void)cmd;
-	wsp_h8_terminal_cmd(restart, sizeof(restart) - 1);
-}
-
-void wsp_h8_power_off(void)
-{
-	static const char off[] = "power-off";
-
-	wsp_h8_terminal_cmd(off, sizeof(off) - 1);
-}
-
-static void __iomem *wsp_h8_getaddr(void)
-{
-	struct device_node *aliases;
-	struct device_node *uart;
-	struct property *path;
-	void __iomem *va = NULL;
-
-	/*
-	 * there is nothing in the devtree to tell us which is mapped
-	 * to the H8, but se know it is the second serial port.
-	 */
-
-	aliases = of_find_node_by_path("/aliases");
-	if (aliases == NULL)
-		return NULL;
-
-	path = of_find_property(aliases, "serial1", NULL);
-	if (path == NULL)
-		goto out;
-
-	uart = of_find_node_by_path(path->value);
-	if (uart == NULL)
-		goto out;
-
-	va = of_iomap(uart, 0);
-
-	/* remove it so no one messes with it */
-	of_detach_node(uart);
-	of_node_put(uart);
-
-out:
-	of_node_put(aliases);
-
-	return va;
-}
-
-void __init wsp_setup_h8(void)
-{
-	h8 = wsp_h8_getaddr();
-
-	/* Devtree change? lets hard map it anyway */
-	if (h8 == NULL) {
-		pr_warn("UART to H8 could not be found");
-		h8 = ioremap(0xffc0008000ULL, 0x100);
-	}
-}
diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c
deleted file mode 100644
index 2d3b1dd9571..00000000000
--- a/arch/powerpc/platforms/wsp/ics.c
+++ /dev/null
@@ -1,760 +0,0 @@
-/*
- * Copyright 2008-2011 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/cpu.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/msi.h>
-#include <linux/of.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/xics.h>
-
-#include "wsp.h"
-#include "ics.h"
-
-
-/* WSP ICS */
-
-struct wsp_ics {
-	struct ics ics;
-	struct device_node *dn;
-	void __iomem *regs;
-	spinlock_t lock;
-	unsigned long *bitmap;
-	u32 chip_id;
-	u32 lsi_base;
-	u32 lsi_count;
-	u64 hwirq_start;
-	u64 count;
-#ifdef CONFIG_SMP
-	int *hwirq_cpu_map;
-#endif
-};
-
-#define to_wsp_ics(ics)	container_of(ics, struct wsp_ics, ics)
-
-#define INT_SRC_LAYER_BUID_REG(base)	((base) + 0x00)
-#define IODA_TBL_ADDR_REG(base)		((base) + 0x18)
-#define IODA_TBL_DATA_REG(base)		((base) + 0x20)
-#define XIVE_UPDATE_REG(base)		((base) + 0x28)
-#define ICS_INT_CAPS_REG(base)		((base) + 0x30)
-
-#define TBL_AUTO_INCREMENT	((1UL << 63) | (1UL << 15))
-#define TBL_SELECT_XIST		(1UL << 48)
-#define TBL_SELECT_XIVT		(1UL << 49)
-
-#define IODA_IRQ(irq)		((irq) & (0x7FFULL))	/* HRM 5.1.3.4 */
-
-#define XIST_REQUIRED		0x8
-#define XIST_REJECTED		0x4
-#define XIST_PRESENTED		0x2
-#define XIST_PENDING		0x1
-
-#define XIVE_SERVER_SHIFT	42
-#define XIVE_SERVER_MASK	0xFFFFULL
-#define XIVE_PRIORITY_MASK	0xFFULL
-#define XIVE_PRIORITY_SHIFT	32
-#define XIVE_WRITE_ENABLE	(1ULL << 63)
-
-/*
- * The docs refer to a 6 bit field called ChipID, which consists of a
- * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero
- * so we ignore it, and every where we use "chip id" in this code we
- * mean the NodeID.
- */
-#define WSP_ICS_CHIP_SHIFT		17
-
-
-static struct wsp_ics *ics_list;
-static int num_ics;
-
-/* ICS Source controller accessors */
-
-static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq)
-{
-	unsigned long flags;
-	u64 xive;
-
-	spin_lock_irqsave(&ics->lock, flags);
-	out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq));
-	xive = in_be64(IODA_TBL_DATA_REG(ics->regs));
-	spin_unlock_irqrestore(&ics->lock, flags);
-
-	return xive;
-}
-
-static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive)
-{
-	xive &= ~XIVE_ADDR_MASK;
-	xive |= (irq & XIVE_ADDR_MASK);
-	xive |= XIVE_WRITE_ENABLE;
-
-	out_be64(XIVE_UPDATE_REG(ics->regs), xive);
-}
-
-static u64 xive_set_server(u64 xive, unsigned int server)
-{
-	u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT);
-
-	xive &= mask;
-	xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT;
-
-	return xive;
-}
-
-static u64 xive_set_priority(u64 xive, unsigned int priority)
-{
-	u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT);
-
-	xive &= mask;
-	xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT;
-
-	return xive;
-}
-
-
-#ifdef CONFIG_SMP
-/* Find logical CPUs within mask on a given chip and store result in ret */
-void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret)
-{
-	int cpu, chip;
-	struct device_node *cpu_dn, *dn;
-	const u32 *prop;
-
-	cpumask_clear(ret);
-	for_each_cpu(cpu, mask) {
-		cpu_dn = of_get_cpu_node(cpu, NULL);
-		if (!cpu_dn)
-			continue;
-
-		prop = of_get_property(cpu_dn, "at-node", NULL);
-		if (!prop) {
-			of_node_put(cpu_dn);
-			continue;
-		}
-
-		dn = of_find_node_by_phandle(*prop);
-		of_node_put(cpu_dn);
-
-		chip = wsp_get_chip_id(dn);
-		if (chip == chip_id)
-			cpumask_set_cpu(cpu, ret);
-
-		of_node_put(dn);
-	}
-}
-
-/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */
-static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
-			   const cpumask_t *affinity)
-{
-	cpumask_var_t avail, newmask;
-	int ret = -ENOMEM, cpu, cpu_rover = 0, target;
-	int index = hwirq - ics->hwirq_start;
-	unsigned int nodeid;
-
-	BUG_ON(index < 0 || index >= ics->count);
-
-	if (!ics->hwirq_cpu_map)
-		return -ENOMEM;
-
-	if (!distribute_irqs) {
-		ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server;
-		return 0;
-	}
-
-	/* Allocate needed CPU masks */
-	if (!alloc_cpumask_var(&avail, GFP_KERNEL))
-		goto ret;
-	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
-		goto freeavail;
-
-	/* Find PBus attached to the source of this IRQ */
-	nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */
-
-	/* Find CPUs that could handle this IRQ */
-	if (affinity)
-		cpumask_and(avail, cpu_online_mask, affinity);
-	else
-		cpumask_copy(avail, cpu_online_mask);
-
-	/* Narrow selection down to logical CPUs on the same chip */
-	cpus_on_chip(nodeid, avail, newmask);
-
-	/* Ensure we haven't narrowed it down to 0 */
-	if (unlikely(cpumask_empty(newmask))) {
-		if (unlikely(cpumask_empty(avail))) {
-			ret = -1;
-			goto out;
-		}
-		cpumask_copy(newmask, avail);
-	}
-
-	/* Choose a CPU out of those we narrowed it down to in round robin */
-	target = hwirq % cpumask_weight(newmask);
-	for_each_cpu(cpu, newmask) {
-		if (cpu_rover++ >= target) {
-			ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu);
-			ret = 0;
-			goto out;
-		}
-	}
-
-	/* Shouldn't happen */
-	WARN_ON(1);
-
-out:
-	free_cpumask_var(newmask);
-freeavail:
-	free_cpumask_var(avail);
-ret:
-	if (ret < 0) {
-		ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask);
-		pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n",
-			   hwirq, ics->hwirq_cpu_map[index]);
-	}
-	return ret;
-}
-
-static void alloc_irq_map(struct wsp_ics *ics)
-{
-	int i;
-
-	ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL);
-	if (!ics->hwirq_cpu_map) {
-		pr_warning("Allocate hwirq_cpu_map failed, "
-			   "IRQ balancing disabled\n");
-		return;
-	}
-
-	for (i=0; i < ics->count; i++)
-		ics->hwirq_cpu_map[i] = xics_default_server;
-}
-
-static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
-{
-	int index = hwirq - ics->hwirq_start;
-
-	BUG_ON(index < 0 || index >= ics->count);
-
-	if (!ics->hwirq_cpu_map)
-		return xics_default_server;
-
-	return ics->hwirq_cpu_map[index];
-}
-#else /* !CONFIG_SMP */
-static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
-			   const cpumask_t *affinity)
-{
-	return 0;
-}
-
-static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
-{
-	return xics_default_server;
-}
-
-static void alloc_irq_map(struct wsp_ics *ics) { }
-#endif
-
-static void wsp_chip_unmask_irq(struct irq_data *d)
-{
-	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-	struct wsp_ics *ics;
-	int server;
-	u64 xive;
-
-	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
-		return;
-
-	ics = d->chip_data;
-	if (WARN_ON(!ics))
-		return;
-
-	server = get_irq_server(ics, hw_irq);
-
-	xive = wsp_ics_get_xive(ics, hw_irq);
-	xive = xive_set_server(xive, server);
-	xive = xive_set_priority(xive, DEFAULT_PRIORITY);
-	wsp_ics_set_xive(ics, hw_irq, xive);
-}
-
-static unsigned int wsp_chip_startup(struct irq_data *d)
-{
-	/* unmask it */
-	wsp_chip_unmask_irq(d);
-	return 0;
-}
-
-static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics)
-{
-	u64 xive;
-
-	if (hw_irq == XICS_IPI)
-		return;
-
-	if (WARN_ON(!ics))
-		return;
-	xive = wsp_ics_get_xive(ics, hw_irq);
-	xive = xive_set_server(xive, xics_default_server);
-	xive = xive_set_priority(xive, LOWEST_PRIORITY);
-	wsp_ics_set_xive(ics, hw_irq, xive);
-}
-
-static void wsp_chip_mask_irq(struct irq_data *d)
-{
-	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-	struct wsp_ics *ics = d->chip_data;
-
-	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
-		return;
-
-	wsp_mask_real_irq(hw_irq, ics);
-}
-
-static int wsp_chip_set_affinity(struct irq_data *d,
-				 const struct cpumask *cpumask, bool force)
-{
-	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-	struct wsp_ics *ics;
-	int ret;
-	u64 xive;
-
-	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
-		return -1;
-
-	ics = d->chip_data;
-	if (WARN_ON(!ics))
-		return -1;
-	xive = wsp_ics_get_xive(ics, hw_irq);
-
-	/*
-	 * For the moment only implement delivery to all cpus or one cpu.
-	 * Get current irq_server for the given irq
-	 */
-	ret = cache_hwirq_map(ics, hw_irq, cpumask);
-	if (ret == -1) {
-		char cpulist[128];
-		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
-		pr_warning("%s: No online cpus in the mask %s for irq %d\n",
-			   __func__, cpulist, d->irq);
-		return -1;
-	} else if (ret == -ENOMEM) {
-		pr_warning("%s: Out of memory\n", __func__);
-		return -1;
-	}
-
-	xive = xive_set_server(xive, get_irq_server(ics, hw_irq));
-	wsp_ics_set_xive(ics, hw_irq, xive);
-
-	return IRQ_SET_MASK_OK;
-}
-
-static struct irq_chip wsp_irq_chip = {
-	.name = "WSP ICS",
-	.irq_startup		= wsp_chip_startup,
-	.irq_mask		= wsp_chip_mask_irq,
-	.irq_unmask		= wsp_chip_unmask_irq,
-	.irq_set_affinity	= wsp_chip_set_affinity
-};
-
-static int wsp_ics_host_match(struct ics *ics, struct device_node *dn)
-{
-	/* All ICSs in the system implement a global irq number space,
-	 * so match against them all. */
-	return of_device_is_compatible(dn, "ibm,ppc-xics");
-}
-
-static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq)
-{
-	if (hwirq >= wsp_ics->hwirq_start &&
-	    hwirq <  wsp_ics->hwirq_start + wsp_ics->count)
-		return 1;
-
-	return 0;
-}
-
-static int wsp_ics_map(struct ics *ics, unsigned int virq)
-{
-	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
-	unsigned int hw_irq = virq_to_hw(virq);
-	unsigned long flags;
-
-	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
-		return -ENOENT;
-
-	irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq);
-
-	irq_set_chip_data(virq, wsp_ics);
-
-	spin_lock_irqsave(&wsp_ics->lock, flags);
-	bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0);
-	spin_unlock_irqrestore(&wsp_ics->lock, flags);
-
-	return 0;
-}
-
-static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq)
-{
-	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
-
-	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
-		return;
-
-	pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq);
-	wsp_mask_real_irq(hw_irq, wsp_ics);
-}
-
-static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq)
-{
-	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
-
-	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
-		return -ENOENT;
-
-	return get_irq_server(wsp_ics, hw_irq);
-}
-
-/* HW Number allocation API */
-
-static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn)
-{
-	struct device_node *iparent;
-	int i;
-
-	iparent = of_irq_find_parent(dn);
-	if (!iparent) {
-		pr_err("wsp_ics: Failed to find interrupt parent!\n");
-		return NULL;
-	}
-
-	for(i = 0; i < num_ics; i++) {
-		if(ics_list[i].dn == iparent)
-			break;
-	}
-
-	if (i >= num_ics) {
-		pr_err("wsp_ics: Unable to find parent bitmap!\n");
-		return NULL;
-	}
-
-	return &ics_list[i];
-}
-
-int wsp_ics_alloc_irq(struct device_node *dn, int num)
-{
-	struct wsp_ics *ics;
-	int order, offset;
-
-	ics = wsp_ics_find_dn_ics(dn);
-	if (!ics)
-		return -ENODEV;
-
-	/* Fast, but overly strict if num isn't a power of two */
-	order = get_count_order(num);
-
-	spin_lock_irq(&ics->lock);
-	offset = bitmap_find_free_region(ics->bitmap, ics->count, order);
-	spin_unlock_irq(&ics->lock);
-
-	if (offset < 0)
-		return offset;
-
-	return offset + ics->hwirq_start;
-}
-
-void wsp_ics_free_irq(struct device_node *dn, unsigned int irq)
-{
-	struct wsp_ics *ics;
-
-	ics = wsp_ics_find_dn_ics(dn);
-	if (WARN_ON(!ics))
-		return;
-
-	spin_lock_irq(&ics->lock);
-	bitmap_release_region(ics->bitmap, irq, 0);
-	spin_unlock_irq(&ics->lock);
-}
-
-/* Initialisation */
-
-static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics,
-				      struct device_node *dn)
-{
-	int len, i, j, size;
-	u32 start, count;
-	const u32 *p;
-
-	size = BITS_TO_LONGS(ics->count) * sizeof(long);
-	ics->bitmap = kzalloc(size, GFP_KERNEL);
-	if (!ics->bitmap) {
-		pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n");
-		return -ENOMEM;
-	}
-
-	spin_lock_init(&ics->lock);
-
-	p = of_get_property(dn, "available-ranges", &len);
-	if (!p || !len) {
-		/* FIXME this should be a WARN() once mambo is updated */
-		pr_err("wsp_ics: No available-ranges defined for %s\n",
-			dn->full_name);
-		return 0;
-	}
-
-	if (len % (2 * sizeof(u32)) != 0) {
-		/* FIXME this should be a WARN() once mambo is updated */
-		pr_err("wsp_ics: Invalid available-ranges for %s\n",
-			dn->full_name);
-		return 0;
-	}
-
-	bitmap_fill(ics->bitmap, ics->count);
-
-	for (i = 0; i < len / sizeof(u32); i += 2) {
-		start = of_read_number(p + i, 1);
-		count = of_read_number(p + i + 1, 1);
-
-		pr_devel("%s: start: %d count: %d\n", __func__, start, count);
-
-		if ((start + count) > (ics->hwirq_start + ics->count) ||
-		     start < ics->hwirq_start) {
-			pr_err("wsp_ics: Invalid range! -> %d to %d\n",
-					start, start + count);
-			break;
-		}
-
-		for (j = 0; j < count; j++)
-			bitmap_release_region(ics->bitmap,
-				(start + j) - ics->hwirq_start, 0);
-	}
-
-	/* Ensure LSIs are not available for allocation */
-	bitmap_allocate_region(ics->bitmap, ics->lsi_base,
-			       get_count_order(ics->lsi_count));
-
-	return 0;
-}
-
-static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn)
-{
-	u32 lsi_buid, msi_buid, msi_base, msi_count;
-	void __iomem *regs;
-	const u32 *p;
-	int rc, len, i;
-	u64 caps, buid;
-
-	p = of_get_property(dn, "interrupt-ranges", &len);
-	if (!p || len < (2 * sizeof(u32))) {
-		pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n",
-			dn->full_name);
-		return -ENOENT;
-	}
-
-	if (len > (2 * sizeof(u32))) {
-		pr_err("wsp_ics: Multiple ics ranges not supported.\n");
-		return -EINVAL;
-	}
-
-	regs = of_iomap(dn, 0);
-	if (!regs) {
-		pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name);
-		return -ENXIO;
-	}
-
-	ics->hwirq_start = of_read_number(p, 1);
-	ics->count = of_read_number(p + 1, 1);
-	ics->regs = regs;
-
-	ics->chip_id = wsp_get_chip_id(dn);
-	if (WARN_ON(ics->chip_id < 0))
-		ics->chip_id = 0;
-
-	/* Get some informations about the critter */
-	caps = in_be64(ICS_INT_CAPS_REG(ics->regs));
-	buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs));
-	ics->lsi_count = caps >> 56;
-	msi_count = (caps >> 44) & 0x7ff;
-
-	/* Note: LSI BUID is 9 bits, but really only 3 are BUID and the
-	 * rest is mixed in the interrupt number. We store the whole
-	 * thing though
-	 */
-	lsi_buid = (buid >> 48) & 0x1ff;
-	ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5;
-	msi_buid = (buid >> 37) & 0x7;
-	msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11;
-
-	pr_info("wsp_ics: Found %s\n", dn->full_name);
-	pr_info("wsp_ics:    irq range : 0x%06llx..0x%06llx\n",
-		ics->hwirq_start, ics->hwirq_start + ics->count - 1);
-	pr_info("wsp_ics:    %4d LSIs : 0x%06x..0x%06x\n",
-		ics->lsi_count, ics->lsi_base,
-		ics->lsi_base + ics->lsi_count - 1);
-	pr_info("wsp_ics:    %4d MSIs : 0x%06x..0x%06x\n",
-		msi_count, msi_base,
-		msi_base + msi_count - 1);
-
-	/* Let's check the HW config is sane */
-	if (ics->lsi_base < ics->hwirq_start ||
-	    (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count))
-		pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n");
-	if (msi_base < ics->hwirq_start ||
-	    (msi_base + msi_count) > (ics->hwirq_start + ics->count))
-		pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n");
-
-	/* We don't check for overlap between LSI and MSI, which will happen
-	 * if we use the same BUID, I'm not sure yet how legit that is.
-	 */
-
-	rc = wsp_ics_bitmap_setup(ics, dn);
-	if (rc) {
-		iounmap(regs);
-		return rc;
-	}
-
-	ics->dn = of_node_get(dn);
-	alloc_irq_map(ics);
-
-	for(i = 0; i < ics->count; i++)
-		wsp_mask_real_irq(ics->hwirq_start + i, ics);
-
-	ics->ics.map = wsp_ics_map;
-	ics->ics.mask_unknown = wsp_ics_mask_unknown;
-	ics->ics.get_server = wsp_ics_get_server;
-	ics->ics.host_match = wsp_ics_host_match;
-
-	xics_register_ics(&ics->ics);
-
-	return 0;
-}
-
-static void __init wsp_ics_set_default_server(void)
-{
-	struct device_node *np;
-	u32 hwid;
-
-	/* Find the server number for the boot cpu. */
-	np = of_get_cpu_node(boot_cpuid, NULL);
-	BUG_ON(!np);
-
-	hwid = get_hard_smp_processor_id(boot_cpuid);
-
-	pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name);
-	xics_default_server = hwid;
-
-	of_node_put(np);
-}
-
-static int __init wsp_ics_init(void)
-{
-	struct device_node *dn;
-	struct wsp_ics *ics;
-	int rc, found;
-
-	wsp_ics_set_default_server();
-
-	found = 0;
-	for_each_compatible_node(dn, NULL, "ibm,ppc-xics")
-		found++;
-
-	if (found == 0) {
-		pr_err("wsp_ics: No ICS's found!\n");
-		return -ENODEV;
-	}
-
-	ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL);
-	if (!ics_list) {
-		pr_err("wsp_ics: No memory for structs.\n");
-		return -ENOMEM;
-	}
-
-	num_ics = 0;
-	ics = ics_list;
-	for_each_compatible_node(dn, NULL, "ibm,wsp-xics") {
-		rc = wsp_ics_setup(ics, dn);
-		if (rc == 0) {
-			ics++;
-			num_ics++;
-		}
-	}
-
-	if (found != num_ics) {
-		pr_err("wsp_ics: Failed setting up %d ICS's\n",
-			found - num_ics);
-		return -1;
-	}
-
-	return 0;
-}
-
-void __init wsp_init_irq(void)
-{
-	wsp_ics_init();
-	xics_init();
-
-	/* We need to patch our irq chip's EOI to point to the right ICP */
-	wsp_irq_chip.irq_eoi = icp_ops->eoi;
-}
-
-#ifdef CONFIG_PCI_MSI
-static void wsp_ics_msi_unmask_irq(struct irq_data *d)
-{
-	wsp_chip_unmask_irq(d);
-	unmask_msi_irq(d);
-}
-
-static unsigned int wsp_ics_msi_startup(struct irq_data *d)
-{
-	wsp_ics_msi_unmask_irq(d);
-	return 0;
-}
-
-static void wsp_ics_msi_mask_irq(struct irq_data *d)
-{
-	mask_msi_irq(d);
-	wsp_chip_mask_irq(d);
-}
-
-/*
- * we do it this way because we reassinge default EOI handling in
- * irq_init() above
- */
-static void wsp_ics_eoi(struct irq_data *data)
-{
-	wsp_irq_chip.irq_eoi(data);
-}
-
-static struct irq_chip wsp_ics_msi = {
-	.name = "WSP ICS MSI",
-	.irq_startup = wsp_ics_msi_startup,
-	.irq_mask = wsp_ics_msi_mask_irq,
-	.irq_unmask = wsp_ics_msi_unmask_irq,
-	.irq_eoi = wsp_ics_eoi,
-	.irq_set_affinity = wsp_chip_set_affinity
-};
-
-void wsp_ics_set_msi_chip(unsigned int irq)
-{
-	irq_set_chip(irq, &wsp_ics_msi);
-}
-
-void wsp_ics_set_std_chip(unsigned int irq)
-{
-	irq_set_chip(irq, &wsp_irq_chip);
-}
-#endif /* CONFIG_PCI_MSI */
diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h
deleted file mode 100644
index 07b644e0cf9..00000000000
--- a/arch/powerpc/platforms/wsp/ics.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright 2009 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#ifndef __ICS_H
-#define __ICS_H
-
-#define XIVE_ADDR_MASK		0x7FFULL
-
-extern void wsp_init_irq(void);
-
-extern int wsp_ics_alloc_irq(struct device_node *dn, int num);
-extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq);
-
-#ifdef CONFIG_PCI_MSI
-extern void wsp_ics_set_msi_chip(unsigned int irq);
-extern void wsp_ics_set_std_chip(unsigned int irq);
-#endif /* CONFIG_PCI_MSI */
-
-#endif /* __ICS_H */
diff --git a/arch/powerpc/platforms/wsp/msi.c b/arch/powerpc/platforms/wsp/msi.c
deleted file mode 100644
index 380882f27ad..00000000000
--- a/arch/powerpc/platforms/wsp/msi.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright 2011 Michael Ellerman, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/msi.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-
-#include "msi.h"
-#include "ics.h"
-#include "wsp_pci.h"
-
-/* Magic addresses for 32 & 64-bit MSIs with hardcoded MVE 0 */
-#define MSI_ADDR_32		0xFFFF0000ul
-#define MSI_ADDR_64		0x1000000000000000ul
-
-int wsp_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	struct pci_controller *phb;
-	struct msi_desc *entry;
-	struct msi_msg msg;
-	unsigned int virq;
-	int hwirq;
-
-	phb = pci_bus_to_host(dev->bus);
-	if (!phb)
-		return -ENOENT;
-
-	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
-	if (entry->msi_attrib.is_64) {
-		msg.address_lo = 0;
-		msg.address_hi = MSI_ADDR_64 >> 32;
-	} else {
-		msg.address_lo = MSI_ADDR_32;
-		msg.address_hi = 0;
-	}
-
-	list_for_each_entry(entry, &dev->msi_list, list) {
-		hwirq = wsp_ics_alloc_irq(phb->dn, 1);
-		if (hwirq < 0) {
-			dev_warn(&dev->dev, "wsp_msi: hwirq alloc failed!\n");
-			return hwirq;
-		}
-
-		virq = irq_create_mapping(NULL, hwirq);
-		if (virq == NO_IRQ) {
-			dev_warn(&dev->dev, "wsp_msi: virq alloc failed!\n");
-			return -1;
-		}
-
-		dev_dbg(&dev->dev, "wsp_msi: allocated irq %#x/%#x\n",
-			hwirq, virq);
-
-		wsp_ics_set_msi_chip(virq);
-		irq_set_msi_desc(virq, entry);
-		msg.data = hwirq & XIVE_ADDR_MASK;
-		write_msi_msg(virq, &msg);
-	}
-
-	return 0;
-}
-
-void wsp_teardown_msi_irqs(struct pci_dev *dev)
-{
-	struct pci_controller *phb;
-	struct msi_desc *entry;
-	int hwirq;
-
-	phb = pci_bus_to_host(dev->bus);
-
-	dev_dbg(&dev->dev, "wsp_msi: tearing down msi irqs\n");
-
-	list_for_each_entry(entry, &dev->msi_list, list) {
-		if (entry->irq == NO_IRQ)
-			continue;
-
-		irq_set_msi_desc(entry->irq, NULL);
-		wsp_ics_set_std_chip(entry->irq);
-
-		hwirq = virq_to_hw(entry->irq);
-		/* In this order to avoid racing with irq_create_mapping() */
-		irq_dispose_mapping(entry->irq);
-		wsp_ics_free_irq(phb->dn, hwirq);
-	}
-}
-
-void wsp_setup_phb_msi(struct pci_controller *phb)
-{
-	/* Create a single MVE at offset 0 that matches everything */
-	out_be64(phb->cfg_data + PCIE_REG_IODA_ADDR, PCIE_REG_IODA_AD_TBL_MVT);
-	out_be64(phb->cfg_data + PCIE_REG_IODA_DATA0, 1ull << 63);
-
-	ppc_md.setup_msi_irqs = wsp_setup_msi_irqs;
-	ppc_md.teardown_msi_irqs = wsp_teardown_msi_irqs;
-}
diff --git a/arch/powerpc/platforms/wsp/msi.h b/arch/powerpc/platforms/wsp/msi.h
deleted file mode 100644
index 0ab27b71b24..00000000000
--- a/arch/powerpc/platforms/wsp/msi.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright 2011 Michael Ellerman, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#ifndef __WSP_MSI_H
-#define __WSP_MSI_H
-
-#ifdef CONFIG_PCI_MSI
-extern void wsp_setup_phb_msi(struct pci_controller *phb);
-#else
-static inline void wsp_setup_phb_msi(struct pci_controller *phb) { }
-#endif
-
-#endif /* __WSP_MSI_H */
diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c
deleted file mode 100644
index cb565bf9365..00000000000
--- a/arch/powerpc/platforms/wsp/opb_pic.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * IBM Onboard Peripheral Bus Interrupt Controller
- *
- * Copyright 2010 Jack Miller, IBM Corporation.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/of.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-
-#include <asm/reg_a2.h>
-#include <asm/irq.h>
-
-#define OPB_NR_IRQS 32
-
-#define OPB_MLSASIER	0x04    /* MLS Accumulated Status IER */
-#define OPB_MLSIR	0x50	/* MLS Interrupt Register */
-#define OPB_MLSIER	0x54	/* MLS Interrupt Enable Register */
-#define OPB_MLSIPR	0x58	/* MLS Interrupt Polarity Register */
-#define OPB_MLSIIR	0x5c	/* MLS Interrupt Inputs Register */
-
-static int opb_index = 0;
-
-struct opb_pic {
-	struct irq_domain *host;
-	void *regs;
-	int index;
-	spinlock_t lock;
-};
-
-static u32 opb_in(struct opb_pic *opb, int offset)
-{
-	return in_be32(opb->regs + offset);
-}
-
-static void opb_out(struct opb_pic *opb, int offset, u32 val)
-{
-	out_be32(opb->regs + offset, val);
-}
-
-static void opb_unmask_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 ier, bitset;
-
-	opb = d->chip_data;
-	bitset = (1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ier = opb_in(opb, OPB_MLSIER);
-	opb_out(opb, OPB_MLSIER, ier | bitset);
-	ier = opb_in(opb, OPB_MLSIER);
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static void opb_mask_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 ier, mask;
-
-	opb = d->chip_data;
-	mask = ~(1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ier = opb_in(opb, OPB_MLSIER);
-	opb_out(opb, OPB_MLSIER, ier & mask);
-	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static void opb_ack_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 bitset;
-
-	opb = d->chip_data;
-	bitset = (1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	opb_out(opb, OPB_MLSIR, bitset);
-	opb_in(opb, OPB_MLSIR); // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static void opb_mask_ack_irq(struct irq_data *d)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	u32 bitset;
-	u32 ier, ir;
-
-	opb = d->chip_data;
-	bitset = (1 << (31 - irqd_to_hwirq(d)));
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ier = opb_in(opb, OPB_MLSIER);
-	opb_out(opb, OPB_MLSIER, ier & ~bitset);
-	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
-
-	opb_out(opb, OPB_MLSIR, bitset);
-	ir = opb_in(opb, OPB_MLSIR); // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-}
-
-static int opb_set_irq_type(struct irq_data *d, unsigned int flow)
-{
-	struct opb_pic *opb;
-	unsigned long flags;
-	int invert, ipr, mask, bit;
-
-	opb = d->chip_data;
-
-	/* The only information we're interested in in the type is whether it's
-	 * a high or low trigger. For high triggered interrupts, the polarity
-	 * set for it in the MLS Interrupt Polarity Register is 0, for low
-	 * interrupts it's 1 so that the proper input in the MLS Interrupt Input
-	 * Register is interrupted as asserting the interrupt. */
-
-	switch (flow) {
-		case IRQ_TYPE_NONE:
-			opb_mask_irq(d);
-			return 0;
-
-		case IRQ_TYPE_LEVEL_HIGH:
-			invert = 0;
-			break;
-
-		case IRQ_TYPE_LEVEL_LOW:
-			invert = 1;
-			break;
-
-		default:
-			return -EINVAL;
-	}
-
-	bit = (1 << (31 - irqd_to_hwirq(d)));
-	mask = ~bit;
-
-	spin_lock_irqsave(&opb->lock, flags);
-
-	ipr = opb_in(opb, OPB_MLSIPR);
-	ipr = (ipr & mask) | (invert ? bit : 0);
-	opb_out(opb, OPB_MLSIPR, ipr);
-	ipr = opb_in(opb, OPB_MLSIPR);  // Flush posted writes
-
-	spin_unlock_irqrestore(&opb->lock, flags);
-
-	/* Record the type in the interrupt descriptor */
-	irqd_set_trigger_type(d, flow);
-
-	return 0;
-}
-
-static struct irq_chip opb_irq_chip = {
-	.name		= "OPB",
-	.irq_mask	= opb_mask_irq,
-	.irq_unmask	= opb_unmask_irq,
-	.irq_mask_ack	= opb_mask_ack_irq,
-	.irq_ack	= opb_ack_irq,
-	.irq_set_type	= opb_set_irq_type
-};
-
-static int opb_host_map(struct irq_domain *host, unsigned int virq,
-		irq_hw_number_t hwirq)
-{
-	struct opb_pic *opb;
-
-	opb = host->host_data;
-
-	/* Most of the important stuff is handled by the generic host code, like
-	 * the lookup, so just attach some info to the virtual irq */
-
-	irq_set_chip_data(virq, opb);
-	irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq);
-	irq_set_irq_type(virq, IRQ_TYPE_NONE);
-
-	return 0;
-}
-
-static const struct irq_domain_ops opb_host_ops = {
-	.map = opb_host_map,
-	.xlate = irq_domain_xlate_twocell,
-};
-
-irqreturn_t opb_irq_handler(int irq, void *private)
-{
-	struct opb_pic *opb;
-	u32 ir, src, subvirq;
-
-	opb = (struct opb_pic *) private;
-
-	/* Read the OPB MLS Interrupt Register for
-	 * asserted interrupts */
-	ir = opb_in(opb, OPB_MLSIR);
-	if (!ir)
-		return IRQ_NONE;
-
-	do {
-		/* Get 1 - 32 source, *NOT* bit */
-		src = 32 - ffs(ir);
-
-		/* Translate from the OPB's conception of interrupt number to
-		 * Linux's virtual IRQ */
-
-		subvirq = irq_linear_revmap(opb->host, src);
-
-		generic_handle_irq(subvirq);
-	} while ((ir = opb_in(opb, OPB_MLSIR)));
-
-	return IRQ_HANDLED;
-}
-
-struct opb_pic *opb_pic_init_one(struct device_node *dn)
-{
-	struct opb_pic *opb;
-	struct resource res;
-
-	if (of_address_to_resource(dn, 0, &res)) {
-		printk(KERN_ERR "opb: Couldn't translate resource\n");
-		return  NULL;
-	}
-
-	opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL);
-	if (!opb) {
-		printk(KERN_ERR "opb: Failed to allocate opb struct!\n");
-		return NULL;
-	}
-
-	/* Get access to the OPB MMIO registers */
-	opb->regs = ioremap(res.start + 0x10000, 0x1000);
-	if (!opb->regs) {
-		printk(KERN_ERR "opb: Failed to allocate register space!\n");
-		goto free_opb;
-	}
-
-	/* Allocate an irq domain so that Linux knows that despite only
-	 * having one interrupt to issue, we're the controller for multiple
-	 * hardware IRQs, so later we can lookup their virtual IRQs. */
-
-	opb->host = irq_domain_add_linear(dn, OPB_NR_IRQS, &opb_host_ops, opb);
-	if (!opb->host) {
-		printk(KERN_ERR "opb: Failed to allocate IRQ host!\n");
-		goto free_regs;
-	}
-
-	opb->index = opb_index++;
-	spin_lock_init(&opb->lock);
-
-	/* Disable all interrupts by default */
-	opb_out(opb, OPB_MLSASIER, 0);
-	opb_out(opb, OPB_MLSIER, 0);
-
-	/* ACK any interrupts left by FW */
-	opb_out(opb, OPB_MLSIR, 0xFFFFFFFF);
-
-	return opb;
-
-free_regs:
-	iounmap(opb->regs);
-free_opb:
-	kfree(opb);
-	return NULL;
-}
-
-void __init opb_pic_init(void)
-{
-	struct device_node *dn;
-	struct opb_pic *opb;
-	int virq;
-	int rc;
-
-	/* Call init_one for each OPB device */
-	for_each_compatible_node(dn, NULL, "ibm,opb") {
-
-		/* Fill in an OPB struct */
-		opb = opb_pic_init_one(dn);
-		if (!opb) {
-			printk(KERN_WARNING "opb: Failed to init node, skipped!\n");
-			continue;
-		}
-
-		/* Map / get opb's hardware virtual irq */
-		virq = irq_of_parse_and_map(dn, 0);
-		if (virq <= 0) {
-			printk("opb: irq_op_parse_and_map failed!\n");
-			continue;
-		}
-
-		/* Attach opb interrupt handler to new virtual IRQ */
-		rc = request_irq(virq, opb_irq_handler, IRQF_NO_THREAD,
-				 "OPB LS Cascade", opb);
-		if (rc) {
-			printk("opb: request_irq failed: %d\n", rc);
-			continue;
-		}
-
-		printk("OPB%d init with %d IRQs at %p\n", opb->index,
-				OPB_NR_IRQS, opb->regs);
-	}
-}
diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c
deleted file mode 100644
index 508ec8282b9..00000000000
--- a/arch/powerpc/platforms/wsp/psr2.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-#include <linux/time.h>
-
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "ics.h"
-#include "wsp.h"
-
-
-static void psr2_spin(void)
-{
-	hard_irq_disable();
-	for (;;)
-		continue;
-}
-
-static void psr2_restart(char *cmd)
-{
-	psr2_spin();
-}
-
-static int __init psr2_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (of_flat_dt_is_compatible(root, "ibm,wsp-chroma")) {
-		/* chroma systems also claim they are psr2s */
-		return 0;
-	}
-
-	if (!of_flat_dt_is_compatible(root, "ibm,psr2"))
-		return 0;
-
-	return 1;
-}
-
-define_machine(psr2_md) {
-	.name			= "PSR2 A2",
-	.probe			= psr2_probe,
-	.setup_arch		= wsp_setup_arch,
-	.restart		= psr2_restart,
-	.power_off		= psr2_spin,
-	.halt			= psr2_spin,
-	.calibrate_decr		= generic_calibrate_decr,
-	.init_IRQ		= wsp_setup_irq,
-	.progress		= udbg_progress,
-	.power_save		= book3e_idle,
-};
-
-machine_arch_initcall(psr2_md, wsp_probe_devices);
diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c
deleted file mode 100644
index b56b70aeb49..00000000000
--- a/arch/powerpc/platforms/wsp/scom_smp.c
+++ /dev/null
@@ -1,426 +0,0 @@
-/*
- * SCOM support for A2 platforms
- *
- * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson,
- *		       Michael Ellerman, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/cpumask.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-
-#include <asm/cputhreads.h>
-#include <asm/reg_a2.h>
-#include <asm/scom.h>
-#include <asm/udbg.h>
-
-#include "wsp.h"
-
-#define SCOM_RAMC		0x2a		/* Ram Command */
-#define SCOM_RAMC_TGT1_EXT	0x80000000
-#define SCOM_RAMC_SRC1_EXT	0x40000000
-#define SCOM_RAMC_SRC2_EXT	0x20000000
-#define SCOM_RAMC_SRC3_EXT	0x10000000
-#define SCOM_RAMC_ENABLE	0x00080000
-#define SCOM_RAMC_THREADSEL	0x00060000
-#define SCOM_RAMC_EXECUTE	0x00010000
-#define SCOM_RAMC_MSR_OVERRIDE	0x00008000
-#define SCOM_RAMC_MSR_PR	0x00004000
-#define SCOM_RAMC_MSR_GS	0x00002000
-#define SCOM_RAMC_FORCE		0x00001000
-#define SCOM_RAMC_FLUSH		0x00000800
-#define SCOM_RAMC_INTERRUPT	0x00000004
-#define SCOM_RAMC_ERROR		0x00000002
-#define SCOM_RAMC_DONE		0x00000001
-#define SCOM_RAMI		0x29		/* Ram Instruction */
-#define SCOM_RAMIC		0x28		/* Ram Instruction and Command */
-#define SCOM_RAMIC_INSN		0xffffffff00000000
-#define SCOM_RAMD		0x2d		/* Ram Data */
-#define SCOM_RAMDH		0x2e		/* Ram Data High */
-#define SCOM_RAMDL		0x2f		/* Ram Data Low */
-#define SCOM_PCCR0		0x33		/* PC Configuration Register 0 */
-#define SCOM_PCCR0_ENABLE_DEBUG	0x80000000
-#define SCOM_PCCR0_ENABLE_RAM	0x40000000
-#define SCOM_THRCTL		0x30		/* Thread Control and Status */
-#define SCOM_THRCTL_T0_STOP	0x80000000
-#define SCOM_THRCTL_T1_STOP	0x40000000
-#define SCOM_THRCTL_T2_STOP	0x20000000
-#define SCOM_THRCTL_T3_STOP	0x10000000
-#define SCOM_THRCTL_T0_STEP	0x08000000
-#define SCOM_THRCTL_T1_STEP	0x04000000
-#define SCOM_THRCTL_T2_STEP	0x02000000
-#define SCOM_THRCTL_T3_STEP	0x01000000
-#define SCOM_THRCTL_T0_RUN	0x00800000
-#define SCOM_THRCTL_T1_RUN	0x00400000
-#define SCOM_THRCTL_T2_RUN	0x00200000
-#define SCOM_THRCTL_T3_RUN	0x00100000
-#define SCOM_THRCTL_T0_PM	0x00080000
-#define SCOM_THRCTL_T1_PM	0x00040000
-#define SCOM_THRCTL_T2_PM	0x00020000
-#define SCOM_THRCTL_T3_PM	0x00010000
-#define SCOM_THRCTL_T0_UDE	0x00008000
-#define SCOM_THRCTL_T1_UDE	0x00004000
-#define SCOM_THRCTL_T2_UDE	0x00002000
-#define SCOM_THRCTL_T3_UDE	0x00001000
-#define SCOM_THRCTL_ASYNC_DIS	0x00000800
-#define SCOM_THRCTL_TB_DIS	0x00000400
-#define SCOM_THRCTL_DEC_DIS	0x00000200
-#define SCOM_THRCTL_AND		0x31		/* Thread Control and Status */
-#define SCOM_THRCTL_OR		0x32		/* Thread Control and Status */
-
-
-static DEFINE_PER_CPU(scom_map_t, scom_ptrs);
-
-static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread)
-{
-	scom_map_t scom = per_cpu(scom_ptrs, cpu);
-	int tcpu;
-
-	if (scom_map_ok(scom)) {
-		*first_thread = 0;
-		return scom;
-	}
-
-	*first_thread = 1;
-
-	scom = scom_map_device(np, 0);
-
-	for (tcpu = cpu_first_thread_sibling(cpu);
-	     tcpu <= cpu_last_thread_sibling(cpu); tcpu++)
-		per_cpu(scom_ptrs, tcpu) = scom;
-
-	/* Hack: for the boot core, this will actually get called on
-	 * the second thread up, not the first so our test above will
-	 * set first_thread incorrectly. */
-	if (cpu_first_thread_sibling(cpu) == 0)
-		*first_thread = 0;
-
-	return scom;
-}
-
-static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask)
-{
-	u64 cmd, mask, val;
-	int n = 0;
-
-	cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28)
-		| ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE;
-	mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR;
-
-	scom_write(scom, SCOM_RAMIC, cmd);
-
-	while (!((val = scom_read(scom, SCOM_RAMC)) & mask)) {
-		pr_devel("Waiting on RAMC = 0x%llx\n", val);
-		if (++n == 3) {
-			pr_err("RAMC timeout on instruction 0x%08x, thread %d\n",
-			       insn, thread);
-			return -1;
-		}
-	}
-
-	if (val & SCOM_RAMC_INTERRUPT) {
-		pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n",
-		       insn, thread);
-		return -SCOM_RAMC_INTERRUPT;
-	}
-
-	if (val & SCOM_RAMC_ERROR) {
-		pr_err("RAMC error on instruction 0x%08x, thread %d\n",
-		       insn, thread);
-		return -SCOM_RAMC_ERROR;
-	}
-
-	return 0;
-}
-
-static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt,
-			  u64 *out_gpr)
-{
-	int rc;
-
-	/* or rN, rN, rN */
-	u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11);
-	rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0);
-	if (rc)
-		return rc;
-
-	*out_gpr = scom_read(scom, SCOM_RAMD);
-
-	return 0;
-}
-
-static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr)
-{
-	int rc, sprhi, sprlo;
-	u32 insn;
-
-	sprhi = spr >> 5;
-	sprlo = spr & 0x1f;
-	insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */
-
-	if (spr == 0x0ff0)
-		insn = 0x7c2000a6; /* mfmsr r1 */
-
-	rc = a2_scom_ram(scom, thread, insn, 0xf);
-	if (rc)
-		return rc;
-	return a2_scom_getgpr(scom, thread, 1, 1, out_spr);
-}
-
-static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr,
-			  int alt, u64 val)
-{
-	u32 lis = 0x3c000000 | (gpr << 21);
-	u32 li = 0x38000000 | (gpr << 21);
-	u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16);
-	u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16);
-	u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16);
-	u32 highest = val >> 48;
-	u32 higher = (val >> 32) & 0xffff;
-	u32 high = (val >> 16) & 0xffff;
-	u32 low = val & 0xffff;
-	int lext = alt ? 0x8 : 0x0;
-	int oext = alt ? 0xf : 0x0;
-	int rc = 0;
-
-	if (highest)
-		rc |= a2_scom_ram(scom, thread, lis | highest, lext);
-
-	if (higher) {
-		if (highest)
-			rc |= a2_scom_ram(scom, thread, oris | higher, oext);
-		else
-			rc |= a2_scom_ram(scom, thread, li | higher, lext);
-	}
-
-	if (highest || higher)
-		rc |= a2_scom_ram(scom, thread, rldicr32, oext);
-
-	if (high) {
-		if (highest || higher)
-			rc |= a2_scom_ram(scom, thread, oris | high, oext);
-		else
-			rc |= a2_scom_ram(scom, thread, lis | high, lext);
-	}
-
-	if (highest || higher || high)
-		rc |= a2_scom_ram(scom, thread, ori | low, oext);
-	else
-		rc |= a2_scom_ram(scom, thread, li | low, lext);
-
-	return rc;
-}
-
-static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val)
-{
-	int sprhi = spr >> 5;
-	int sprlo = spr & 0x1f;
-	/* mtspr spr, r1 */
-	u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11);
-
-	if (spr == 0x0ff0)
-		insn = 0x7c200124; /* mtmsr r1 */
-
-	if (a2_scom_setgpr(scom, thread, 1, 1, val))
-		return -1;
-
-	return a2_scom_ram(scom, thread, insn, 0xf);
-}
-
-static int a2_scom_initial_tlb(scom_map_t scom, int thread)
-{
-	extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[];
-	extern u32 a2_tlbinit_after_iprot_flush[];
-	extern u32 a2_tlbinit_after_linear_map[];
-	u32 assoc, entries, i;
-	u64 epn, tlbcfg;
-	u32 *p;
-	int rc;
-
-	/* Invalidate all entries (including iprot) */
-
-	rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg);
-	if (rc)
-		goto scom_fail;
-	entries = tlbcfg & TLBnCFG_N_ENTRY;
-	assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24;
-	epn = 0;
-
-	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
-	a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531);
-	/* Set MMUCR3 to write all thids bit to the TLB */
-	a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f);
-
-	/* Set MAS1 for 1G page size, and MAS2 to our initial EPN */
-	a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB));
-	a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
-	for (i = 0; i < entries; i++) {
-
-		a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc));
-
-		/* tlbwe */
-		rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0);
-		if (rc)
-			goto scom_fail;
-
-		/* Next entry is new address? */
-		if((i + 1) % assoc == 0) {
-			epn += (1 << 30);
-			a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
-		}
-	}
-
-	/* Setup args for linear mapping */
-	rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0));
-	if (rc)
-		goto scom_fail;
-
-	/* Linear mapping */
-	for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) {
-		rc = a2_scom_ram(scom, thread, *p, 0);
-		if (rc)
-			goto scom_fail;
-	}
-
-	/*
-	 * For the boot thread, between the linear mapping and the debug
-	 * mappings there is a loop to flush iprot mappings. Ramming doesn't do
-	 * branches, but the secondary threads don't need to be nearly as smart
-	 * (i.e. we don't need to worry about invalidating the mapping we're
-	 * standing on).
-	 */
-
-	/* Debug mappings. Expects r11 = MAS0 from linear map (set above) */
-	for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) {
-		rc = a2_scom_ram(scom, thread, *p, 0);
-		if (rc)
-			goto scom_fail;
-	}
-
-scom_fail:
-	if (rc)
-		pr_err("Setting up initial TLB failed, err %d\n", rc);
-
-	if (rc == -SCOM_RAMC_INTERRUPT) {
-		/* Interrupt, dump some status */
-		int rc[10];
-		u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2;
-		rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar);
-		rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0);
-		rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1);
-		rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr);
-		rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0);
-		rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1);
-		rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2);
-		rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3);
-		rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8);
-		rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2);
-		pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]);
-		pr_err("    retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]);
-		pr_err("    retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]);
-		pr_err("    retreived ESR =0x%llx (err %d)\n", esr, rc[3]);
-		pr_err("    retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]);
-		pr_err("    retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]);
-		pr_err("    retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]);
-		pr_err("    retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]);
-		pr_err("    retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]);
-		pr_err("    retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]);
-	}
-
-	return rc;
-}
-
-int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, struct device_node *np)
-{
-	u64 init_iar, init_msr, init_ccr2;
-	unsigned long start_here;
-	int rc, core_setup;
-	scom_map_t scom;
-	u64 pccr0;
-
-	scom = get_scom(lcpu, np, &core_setup);
-	if (!scom) {
-		printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu);
-		return -1;
-	}
-
-	pr_devel("Bringing up CPU%d using SCOM...\n", lcpu);
-
-	pccr0 = scom_read(scom, SCOM_PCCR0);
-	scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG |
-				     SCOM_PCCR0_ENABLE_RAM);
-
-	/* Stop the thead with THRCTL. If we are setting up the TLB we stop all
-	 * threads. We also disable asynchronous interrupts while RAMing.
-	 */
-	if (core_setup)
-		scom_write(scom, SCOM_THRCTL_OR,
-			      SCOM_THRCTL_T0_STOP |
-			      SCOM_THRCTL_T1_STOP |
-			      SCOM_THRCTL_T2_STOP |
-			      SCOM_THRCTL_T3_STOP |
-			      SCOM_THRCTL_ASYNC_DIS);
-	else
-		scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx);
-
-	/* Flush its pipeline just in case */
-	scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) |
-		      SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE);
-
-	a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar);
-	a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr);
-	a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2);
-
-	/* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */
-	rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM);
-	if (rc) {
-		pr_err("Failed to set MSR ! err %d\n", rc);
-		return rc;
-	}
-
-	/* RAM in an sync/isync for the sake of it */
-	a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0);
-	a2_scom_ram(scom, thr_idx, 0x4c00012c, 0);
-
-	if (core_setup) {
-		pr_devel("CPU%d is first thread in core, initializing TLB...\n",
-			 lcpu);
-		rc = a2_scom_initial_tlb(scom, thr_idx);
-		if (rc)
-			goto fail;
-	}
-
-	start_here = *(unsigned long *)(core_setup ? generic_secondary_smp_init
-					: generic_secondary_thread_init);
-	pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here);
-
-	rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here);
-	rc |= a2_scom_setgpr(scom, thr_idx, 3, 0,
-			     get_hard_smp_processor_id(lcpu));
-	/*
-	 * Tell book3e_secondary_core_init not to set up the TLB, we've
-	 * already done that.
-	 */
-	rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1);
-
-	rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx);
-
-	scom_write(scom, SCOM_RAMC, 0);
-	scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx));
-	scom_write(scom, SCOM_PCCR0, pccr0);
-fail:
-	pr_devel("  SCOM initialization %s\n", rc ? "failed" : "succeeded");
-	if (rc) {
-		pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n",
-		       init_iar, init_msr, init_ccr2);
-	}
-
-	return rc;
-}
diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c
deleted file mode 100644
index 4052e2259f3..00000000000
--- a/arch/powerpc/platforms/wsp/scom_wsp.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- *  SCOM backend for WSP
- *
- *  Copyright 2010 Benjamin Herrenschmidt, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/cpumask.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-
-#include <asm/cputhreads.h>
-#include <asm/reg_a2.h>
-#include <asm/scom.h>
-#include <asm/udbg.h>
-
-#include "wsp.h"
-
-
-static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count)
-{
-	struct resource r;
-	u64 xscom_addr;
-
-	if (!of_get_property(dev, "scom-controller", NULL)) {
-		pr_err("%s: device %s is not a SCOM controller\n",
-			__func__, dev->full_name);
-		return SCOM_MAP_INVALID;
-	}
-
-	if (of_address_to_resource(dev, 0, &r)) {
-		pr_debug("Failed to find SCOM controller address\n");
-		return 0;
-	}
-
-	/* Transform the SCOM address into an XSCOM offset */
-	xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3);
-
-	return (scom_map_t)ioremap(r.start + xscom_addr, count << 3);
-}
-
-static void wsp_scom_unmap(scom_map_t map)
-{
-	iounmap((void *)map);
-}
-
-static u64 wsp_scom_read(scom_map_t map, u32 reg)
-{
-	u64 __iomem *addr = (u64 __iomem *)map;
-
-	return in_be64(addr + reg);
-}
-
-static void wsp_scom_write(scom_map_t map, u32 reg, u64 value)
-{
-	u64 __iomem *addr = (u64 __iomem *)map;
-
-	return out_be64(addr + reg, value);
-}
-
-static const struct scom_controller wsp_scom_controller = {
-	.map	= wsp_scom_map,
-	.unmap	= wsp_scom_unmap,
-	.read	= wsp_scom_read,
-	.write	= wsp_scom_write
-};
-
-void scom_init_wsp(void)
-{
-	scom_init(&wsp_scom_controller);
-}
diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c
deleted file mode 100644
index 11ac2f05e01..00000000000
--- a/arch/powerpc/platforms/wsp/setup.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2010 Michael Ellerman, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/of_platform.h>
-
-#include "wsp.h"
-
-/*
- * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property.
- * Won't work for nodes that are not a descendant of a wsp node.
- */
-int wsp_get_chip_id(struct device_node *dn)
-{
-	const u32 *p;
-	int rc;
-
-	/* Start looking at the specified node, not its parent */
-	dn = of_node_get(dn);
-	while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL)))
-		dn = of_get_next_parent(dn);
-
-	if (!dn)
-		return -1;
-
-	rc = *p;
-	of_node_put(dn);
-
-	return rc;
-}
diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c
deleted file mode 100644
index 332a18b8140..00000000000
--- a/arch/powerpc/platforms/wsp/smp.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- *  SMP Support for A2 platforms
- *
- *  Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/cpumask.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-
-#include <asm/dbell.h>
-#include <asm/machdep.h>
-#include <asm/xics.h>
-
-#include "ics.h"
-#include "wsp.h"
-
-static void smp_a2_setup_cpu(int cpu)
-{
-	doorbell_setup_this_cpu();
-
-	if (cpu != boot_cpuid)
-		xics_setup_cpu();
-}
-
-int smp_a2_kick_cpu(int nr)
-{
-	const char *enable_method;
-	struct device_node *np;
-	int thr_idx;
-
-	if (nr < 0 || nr >= NR_CPUS)
-		return -ENOENT;
-
-	np = of_get_cpu_node(nr, &thr_idx);
-	if (!np)
-		return -ENODEV;
-
-	enable_method = of_get_property(np, "enable-method", NULL);
-	pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method);
-
-	if (!enable_method) {
-                printk(KERN_ERR "CPU%d has no enable-method\n", nr);
-		return -ENOENT;
-	} else if (strcmp(enable_method, "ibm,a2-scom") == 0) {
-		if (a2_scom_startup_cpu(nr, thr_idx, np))
-			return -1;
-	} else {
-		printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n",
-                       nr, enable_method);
-		return -EINVAL;
-	}
-
-	/*
-	 * The processor is currently spinning, waiting for the
-	 * cpu_start field to become non-zero After we set cpu_start,
-	 * the processor will continue on to secondary_start
-	 */
-	paca[nr].cpu_start = 1;
-
-	return 0;
-}
-
-static int __init smp_a2_probe(void)
-{
-	return num_possible_cpus();
-}
-
-static struct smp_ops_t a2_smp_ops = {
-	.message_pass	= NULL,	/* Use smp_muxed_ipi_message_pass */
-	.cause_ipi	= doorbell_cause_ipi,
-	.probe		= smp_a2_probe,
-	.kick_cpu	= smp_a2_kick_cpu,
-	.setup_cpu	= smp_a2_setup_cpu,
-};
-
-void __init a2_setup_smp(void)
-{
-	smp_ops = &a2_smp_ops;
-}
diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c
deleted file mode 100644
index d25cc96c21b..00000000000
--- a/arch/powerpc/platforms/wsp/wsp.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright 2008-2011, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/smp.h>
-#include <linux/delay.h>
-#include <linux/time.h>
-
-#include <asm/scom.h>
-
-#include "wsp.h"
-#include "ics.h"
-
-#define WSP_SOC_COMPATIBLE	"ibm,wsp-soc"
-#define PBIC_COMPATIBLE		"ibm,wsp-pbic"
-#define COPRO_COMPATIBLE	"ibm,wsp-coprocessor"
-
-static int __init wsp_probe_buses(void)
-{
-	static __initdata struct of_device_id bus_ids[] = {
-		/*
-		 * every node in between needs to be here or you won't
-		 * find it
-		 */
-		{ .compatible = WSP_SOC_COMPATIBLE, },
-		{ .compatible = PBIC_COMPATIBLE, },
-		{ .compatible = COPRO_COMPATIBLE, },
-		{},
-	};
-	of_platform_bus_probe(NULL, bus_ids, NULL);
-
-	return 0;
-}
-
-void __init wsp_setup_arch(void)
-{
-	/* init to some ~sane value until calibrate_delay() runs */
-	loops_per_jiffy = 50000000;
-
-	scom_init_wsp();
-
-	/* Setup SMP callback */
-#ifdef CONFIG_SMP
-	a2_setup_smp();
-#endif
-#ifdef CONFIG_PCI
-	wsp_setup_pci();
-#endif
-}
-
-void __init wsp_setup_irq(void)
-{
-	wsp_init_irq();
-	opb_pic_init();
-}
-
-
-int __init wsp_probe_devices(void)
-{
-	struct device_node *np;
-
-	/* Our RTC is a ds1500. It seems to be programatically compatible
-	 * with the ds1511 for which we have a driver so let's use that
-	 */
-	np = of_find_compatible_node(NULL, NULL, "dallas,ds1500");
-	if (np != NULL) {
-		struct resource res;
-		if (of_address_to_resource(np, 0, &res) == 0)
-			platform_device_register_simple("ds1511", 0, &res, 1);
-	}
-
-	wsp_probe_buses();
-
-	return 0;
-}
-
-void wsp_halt(void)
-{
-	u64 val;
-	scom_map_t m;
-	struct device_node *dn;
-	struct device_node *mine;
-	struct device_node *me;
-
-	me = of_get_cpu_node(smp_processor_id(), NULL);
-	mine = scom_find_parent(me);
-
-	/* This will halt all the A2s but not power off the chip */
-	for_each_node_with_property(dn, "scom-controller") {
-		if (dn == mine)
-			continue;
-		m = scom_map(dn, 0, 1);
-
-		/* read-modify-write it so the HW probe does not get
-		 * confused */
-		val = scom_read(m, 0);
-		val |= 1;
-		scom_write(m, 0, val);
-		scom_unmap(m);
-	}
-	m = scom_map(mine, 0, 1);
-	val = scom_read(m, 0);
-	val |= 1;
-	scom_write(m, 0, val);
-	/* should never return */
-	scom_unmap(m);
-}
diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h
deleted file mode 100644
index 62ef21afb89..00000000000
--- a/arch/powerpc/platforms/wsp/wsp.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef __WSP_H
-#define __WSP_H
-
-#include <asm/wsp.h>
-
-/* Devtree compatible strings for major devices */
-#define PCIE_COMPATIBLE     "ibm,wsp-pciex"
-
-extern void wsp_setup_arch(void);
-extern void wsp_setup_irq(void);
-extern int wsp_probe_devices(void);
-extern void wsp_halt(void);
-
-extern void wsp_setup_pci(void);
-extern void scom_init_wsp(void);
-
-extern void a2_setup_smp(void);
-extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx,
-			       struct device_node *np);
-extern int smp_a2_cpu_bootable(unsigned int nr);
-extern int smp_a2_kick_cpu(int nr);
-
-extern void opb_pic_init(void);
-
-/* chroma specific managment */
-extern void wsp_h8_restart(char *cmd);
-extern void wsp_h8_power_off(void);
-extern void __init wsp_setup_h8(void);
-
-#endif /*  __WSP_H */
diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c
deleted file mode 100644
index 62cb527493e..00000000000
--- a/arch/powerpc/platforms/wsp/wsp_pci.c
+++ /dev/null
@@ -1,1133 +0,0 @@
-/*
- * Copyright 2010 Ben Herrenschmidt, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define DEBUG
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/debugfs.h>
-
-#include <asm/sections.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/machdep.h>
-#include <asm/ppc-pci.h>
-#include <asm/iommu.h>
-#include <asm/io-workarounds.h>
-#include <asm/debug.h>
-
-#include "wsp.h"
-#include "wsp_pci.h"
-#include "msi.h"
-
-
-/* Max number of TVTs for one table. Only 32-bit tables can use
- * multiple TVTs and so the max currently supported is thus 8
- * since only 2G of DMA space is supported
- */
-#define MAX_TABLE_TVT_COUNT		8
-
-struct wsp_dma_table {
-	struct list_head	link;
-	struct iommu_table	table;
-	struct wsp_phb	*phb;
-	struct page		*tces[MAX_TABLE_TVT_COUNT];
-};
-
-/* We support DMA regions from 0...2G in 32bit space (no support for
- * 64-bit DMA just yet). Each device gets a separate TCE table (TVT
- * entry) with validation enabled (though not supported by SimiCS
- * just yet).
- *
- * To simplify things, we divide this 2G space into N regions based
- * on the constant below which could be turned into a tunable eventually
- *
- * We then assign dynamically those regions to devices as they show up.
- *
- * We use a bitmap as an allocator for these.
- *
- * Tables are allocated/created dynamically as devices are discovered,
- * multiple TVT entries are used if needed
- *
- * When 64-bit DMA support is added we should simply use a separate set
- * of larger regions (the HW supports 64 TVT entries). We can
- * additionally create a bypass region in 64-bit space for performances
- * though that would have a cost in term of security.
- *
- * If you set NUM_DMA32_REGIONS to 1, then a single table is shared
- * for all devices and bus/dev/fn validation is disabled
- *
- * Note that a DMA32 region cannot be smaller than 256M so the max
- * supported here for now is 8. We don't yet support sharing regions
- * between multiple devices so the max number of devices supported
- * is MAX_TABLE_TVT_COUNT.
- */
-#define NUM_DMA32_REGIONS	1
-
-struct wsp_phb {
-	struct pci_controller	*hose;
-
-	/* Lock controlling access to the list of dma tables.
-	 * It does -not- protect against dma_* operations on
-	 * those tables, those should be stopped before an entry
-	 * is removed from the list.
-	 *
-	 * The lock is also used for error handling operations
-	 */
-	spinlock_t		lock;
-	struct list_head	dma_tables;
-	unsigned long		dma32_map;
-	unsigned long		dma32_base;
-	unsigned int		dma32_num_regions;
-	unsigned long		dma32_region_size;
-
-	/* Debugfs stuff */
-	struct dentry		*ddir;
-
-	struct list_head	all;
-};
-static LIST_HEAD(wsp_phbs);
-
-//#define cfg_debug(fmt...)	pr_debug(fmt)
-#define cfg_debug(fmt...)
-
-
-static int wsp_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
-				  int offset, int len, u32 *val)
-{
-	struct pci_controller *hose;
-	int suboff;
-	u64 addr;
-
-	hose = pci_bus_to_host(bus);
-	if (hose == NULL)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	if (offset >= 0x1000)
-		return  PCIBIOS_BAD_REGISTER_NUMBER;
-	addr = PCIE_REG_CA_ENABLE |
-		((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT |
-		((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT |
-		((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT;
-	suboff = offset & 3;
-
-	/*
-	 * Note: the caller has already checked that offset is
-	 * suitably aligned and that len is 1, 2 or 4.
-	 */
-
-	switch (len) {
-	case 1:
-		addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		*val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA)
-			>> (suboff << 3)) & 0xff;
-		cfg_debug("read 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, *val);
-		break;
-	case 2:
-		addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		*val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA)
-			>> (suboff << 3)) & 0xffff;
-		cfg_debug("read 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, *val);
-		break;
-	default:
-		addr |= 0xful << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		*val = in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA);
-		cfg_debug("read 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, *val);
-		break;
-	}
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static int wsp_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
-				   int offset, int len, u32 val)
-{
-	struct pci_controller *hose;
-	int suboff;
-	u64 addr;
-
-	hose = pci_bus_to_host(bus);
-	if (hose == NULL)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	if (offset >= 0x1000)
-		return  PCIBIOS_BAD_REGISTER_NUMBER;
-	addr = PCIE_REG_CA_ENABLE |
-		((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT |
-		((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT |
-		((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT;
-	suboff = offset & 3;
-
-	/*
-	 * Note: the caller has already checked that offset is
-	 * suitably aligned and that len is 1, 2 or 4.
-	 */
-	switch (len) {
-	case 1:
-		addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		val <<= suboff << 3;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val);
-		cfg_debug("write 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, val);
-		break;
-	case 2:
-		addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT;
-		val <<= suboff << 3;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val);
-		cfg_debug("write 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, val);
-		break;
-	default:
-		addr |= 0xful << PCIE_REG_CA_BE_SHIFT;
-		out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr);
-		out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val);
-		cfg_debug("write 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n",
-			  bus->number, devfn >> 3, devfn & 7,
-			  offset, suboff, addr, val);
-		break;
-	}
-	return PCIBIOS_SUCCESSFUL;
-}
-
-static struct pci_ops wsp_pcie_pci_ops =
-{
-	.read = wsp_pcie_read_config,
-	.write = wsp_pcie_write_config,
-};
-
-#define TCE_SHIFT		12
-#define TCE_PAGE_SIZE		(1 << TCE_SHIFT)
-#define TCE_PCI_WRITE		0x2		 /* write from PCI allowed */
-#define TCE_PCI_READ		0x1	 	 /* read from PCI allowed */
-#define TCE_RPN_MASK		0x3fffffffffful  /* 42-bit RPN (4K pages) */
-#define TCE_RPN_SHIFT		12
-
-//#define dma_debug(fmt...)	pr_debug(fmt)
-#define dma_debug(fmt...)
-
-static int tce_build_wsp(struct iommu_table *tbl, long index, long npages,
-			   unsigned long uaddr, enum dma_data_direction direction,
-			   struct dma_attrs *attrs)
-{
-	struct wsp_dma_table *ptbl = container_of(tbl,
-						    struct wsp_dma_table,
-						    table);
-	u64 proto_tce;
-	u64 *tcep;
-	u64 rpn;
-
-	proto_tce = TCE_PCI_READ;
-#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-	proto_tce |= TCE_PCI_WRITE;
-#else
-	if (direction != DMA_TO_DEVICE)
-		proto_tce |= TCE_PCI_WRITE;
-#endif
-
-	/* XXX Make this faster by factoring out the page address for
-	 * within a TCE table
-	 */
-	while (npages--) {
-		/* We don't use it->base as the table can be scattered */
-		tcep = (u64 *)page_address(ptbl->tces[index >> 16]);
-		tcep += (index & 0xffff);
-
-		/* can't move this out since we might cross LMB boundary */
-		rpn = __pa(uaddr) >> TCE_SHIFT;
-		*tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
-
-		dma_debug("[DMA] TCE %p set to 0x%016llx (dma addr: 0x%lx)\n",
-			  tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT);
-
-		uaddr += TCE_PAGE_SIZE;
-		index++;
-	}
-	return 0;
-}
-
-static void tce_free_wsp(struct iommu_table *tbl, long index, long npages)
-{
-	struct wsp_dma_table *ptbl = container_of(tbl,
-						    struct wsp_dma_table,
-						    table);
-#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-	struct pci_controller *hose = ptbl->phb->hose;
-#endif
-	u64 *tcep;
-
-	/* XXX Make this faster by factoring out the page address for
-	 * within a TCE table. Also use line-kill option to kill multiple
-	 * TCEs at once
-	 */
-	while (npages--) {
-		/* We don't use it->base as the table can be scattered */
-		tcep = (u64 *)page_address(ptbl->tces[index >> 16]);
-		tcep += (index & 0xffff);
-		dma_debug("[DMA] TCE %p cleared\n", tcep);
-		*tcep = 0;
-#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-		/* Don't write there since it would pollute other MMIO accesses */
-		out_be64(hose->cfg_data + PCIE_REG_TCE_KILL,
-			 PCIE_REG_TCEKILL_SINGLE | PCIE_REG_TCEKILL_PS_4K |
-			 (__pa(tcep) & PCIE_REG_TCEKILL_ADDR_MASK));
-#endif
-		index++;
-	}
-}
-
-static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb,
-							    unsigned int region,
-							    struct pci_dev *validate)
-{
-	struct pci_controller *hose = phb->hose;
-	unsigned long size = phb->dma32_region_size;
-	unsigned long addr = phb->dma32_region_size * region + phb->dma32_base;
-	struct wsp_dma_table *tbl;
-	int tvts_per_table, i, tvt, nid;
-	unsigned long flags;
-
-	nid = of_node_to_nid(phb->hose->dn);
-
-	/* Calculate how many TVTs are needed */
-	tvts_per_table = size / 0x10000000;
-	if (tvts_per_table == 0)
-		tvts_per_table = 1;
-
-	/* Calculate the base TVT index. We know all tables have the same
-	 * size so we just do a simple multiply here
-	 */
-	tvt = region * tvts_per_table;
-
-	pr_debug("         Region : %d\n", region);
-	pr_debug("      DMA range : 0x%08lx..0x%08lx\n", addr, addr + size - 1);
-	pr_debug(" Number of TVTs : %d\n", tvts_per_table);
-	pr_debug("       Base TVT : %d\n", tvt);
-	pr_debug("         Node   : %d\n", nid);
-
-	tbl = kzalloc_node(sizeof(struct wsp_dma_table), GFP_KERNEL, nid);
-	if (!tbl)
-		return ERR_PTR(-ENOMEM);
-	tbl->phb = phb;
-
-	/* Create as many TVTs as needed, each represents 256M at most */
-	for (i = 0; i < tvts_per_table; i++) {
-		u64 tvt_data1, tvt_data0;
-
-		/* Allocate table. We use a 4K TCE size for now always so
-		 * one table is always 8 * (258M / 4K) == 512K
-		 */
-		tbl->tces[i] = alloc_pages_node(nid, GFP_KERNEL, get_order(0x80000));
-		if (tbl->tces[i] == NULL)
-			goto fail;
-		memset(page_address(tbl->tces[i]), 0, 0x80000);
-
-		pr_debug(" TCE table %d at : %p\n", i, page_address(tbl->tces[i]));
-
-		/* Table size. We currently set it to be the whole 256M region */
-		tvt_data0 = 2ull << IODA_TVT0_TCE_TABLE_SIZE_SHIFT;
-		/* IO page size set to 4K */
-		tvt_data1 = 1ull << IODA_TVT1_IO_PAGE_SIZE_SHIFT;
-		/* Shift in the address */
-		tvt_data0 |= __pa(page_address(tbl->tces[i])) << IODA_TVT0_TTA_SHIFT;
-
-		/* Validation stuff. We only validate fully bus/dev/fn for now
-		 * one day maybe we can group devices but that isn't the case
-		 * at the moment
-		 */
-		if (validate) {
-			tvt_data0 |= IODA_TVT0_BUSNUM_VALID_MASK;
-			tvt_data0 |= validate->bus->number;
-			tvt_data1 |= IODA_TVT1_DEVNUM_VALID;
-			tvt_data1 |= ((u64)PCI_SLOT(validate->devfn))
-				<< IODA_TVT1_DEVNUM_VALUE_SHIFT;
-			tvt_data1 |= IODA_TVT1_FUNCNUM_VALID;
-			tvt_data1 |= ((u64)PCI_FUNC(validate->devfn))
-				<< IODA_TVT1_FUNCNUM_VALUE_SHIFT;
-		}
-
-		/* XX PE number is always 0 for now */
-
-		/* Program the values using the PHB lock */
-		spin_lock_irqsave(&phb->lock, flags);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR,
-			 (tvt + i) | PCIE_REG_IODA_AD_TBL_TVT);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, tvt_data1);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, tvt_data0);
-		spin_unlock_irqrestore(&phb->lock, flags);
-	}
-
-	/* Init bits and pieces */
-	tbl->table.it_blocksize = 16;
-	tbl->table.it_offset = addr >> IOMMU_PAGE_SHIFT;
-	tbl->table.it_size = size >> IOMMU_PAGE_SHIFT;
-
-	/*
-	 * It's already blank but we clear it anyway.
-	 * Consider an aditiona interface that makes cleaing optional
-	 */
-	iommu_init_table(&tbl->table, nid);
-
-	list_add(&tbl->link, &phb->dma_tables);
-	return tbl;
-
- fail:
-	pr_debug("  Failed to allocate a 256M TCE table !\n");
-	for (i = 0; i < tvts_per_table; i++)
-		if (tbl->tces[i])
-			__free_pages(tbl->tces[i], get_order(0x80000));
-	kfree(tbl);
-	return ERR_PTR(-ENOMEM);
-}
-
-static void wsp_pci_dma_dev_setup(struct pci_dev *pdev)
-{
-	struct dev_archdata *archdata = &pdev->dev.archdata;
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct wsp_phb *phb = hose->private_data;
-	struct wsp_dma_table *table = NULL;
-	unsigned long flags;
-	int i;
-
-	/* Don't assign an iommu table to a bridge */
-	if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
-		return;
-
-	pr_debug("%s: Setting up DMA...\n", pci_name(pdev));
-
-	spin_lock_irqsave(&phb->lock, flags);
-
-	/* If only one region, check if it already exist */
-	if (phb->dma32_num_regions == 1) {
-		spin_unlock_irqrestore(&phb->lock, flags);
-		if (list_empty(&phb->dma_tables))
-			table = wsp_pci_create_dma32_table(phb, 0, NULL);
-		else
-			table = list_first_entry(&phb->dma_tables,
-						 struct wsp_dma_table,
-						 link);
-	} else {
-		/* else find a free region */
-		for (i = 0; i < phb->dma32_num_regions && !table; i++) {
-			if (__test_and_set_bit(i, &phb->dma32_map))
-				continue;
-			spin_unlock_irqrestore(&phb->lock, flags);
-			table = wsp_pci_create_dma32_table(phb, i, pdev);
-		}
-	}
-
-	/* Check if we got an error */
-	if (IS_ERR(table)) {
-		pr_err("%s: Failed to create DMA table, err %ld !\n",
-		       pci_name(pdev), PTR_ERR(table));
-		return;
-	}
-
-	/* Or a valid table */
-	if (table) {
-		pr_info("%s: Setup iommu: 32-bit DMA region 0x%08lx..0x%08lx\n",
-			pci_name(pdev),
-			table->table.it_offset << IOMMU_PAGE_SHIFT,
-			(table->table.it_offset << IOMMU_PAGE_SHIFT)
-			+ phb->dma32_region_size - 1);
-		archdata->dma_data.iommu_table_base = &table->table;
-		return;
-	}
-
-	/* Or no room */
-	spin_unlock_irqrestore(&phb->lock, flags);
-	pr_err("%s: Out of DMA space !\n", pci_name(pdev));
-}
-
-static void __init wsp_pcie_configure_hw(struct pci_controller *hose)
-{
-	u64 val;
-	int i;
-
-#define DUMP_REG(x) \
-	pr_debug("%-30s : 0x%016llx\n", #x, in_be64(hose->cfg_data + x))
-
-	/*
-	 * Some WSP variants  has a bogus class code by default in the PCI-E
-	 * root complex's built-in P2P bridge
-	 */
-	val = in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1);
-	pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", val);
-	out_be64(hose->cfg_data + PCIE_REG_SYS_CFG1,
-		 (val & ~PCIE_REG_SYS_CFG1_CLASS_CODE) | (PCI_CLASS_BRIDGE_PCI << 8));
-	pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1));
-
-#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS
-	/* XXX Disable TCE caching, it doesn't work on DD1 */
-	out_be64(hose->cfg_data + 0xe50,
-		 in_be64(hose->cfg_data + 0xe50) | (3ull << 62));
-	printk("PCI-E DEBUG CONTROL 5 = 0x%llx\n", in_be64(hose->cfg_data + 0xe50));
-#endif
-
-	/* Configure M32A and IO. IO is hard wired to be 1M for now */
-	out_be64(hose->cfg_data + PCIE_REG_IO_BASE_ADDR, hose->io_base_phys);
-	out_be64(hose->cfg_data + PCIE_REG_IO_BASE_MASK,
-		 (~(hose->io_resource.end - hose->io_resource.start)) &
-		 0x3fffffff000ul);
-	out_be64(hose->cfg_data + PCIE_REG_IO_START_ADDR, 0 | 1);
-
-	out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_ADDR,
-		 hose->mem_resources[0].start);
-	printk("Want to write to M32A_BASE_MASK : 0x%llx\n",
-		 (~(hose->mem_resources[0].end -
-		    hose->mem_resources[0].start)) & 0x3ffffff0000ul);
-	out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_MASK,
-		 (~(hose->mem_resources[0].end -
-		    hose->mem_resources[0].start)) & 0x3ffffff0000ul);
-	out_be64(hose->cfg_data + PCIE_REG_M32A_START_ADDR,
-		 (hose->mem_resources[0].start - hose->mem_offset[0]) | 1);
-
-	/* Clear all TVT entries
-	 *
-	 * XX Might get TVT count from device-tree
-	 */
-	for (i = 0; i < IODA_TVT_COUNT; i++) {
-		out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR,
-			 PCIE_REG_IODA_AD_TBL_TVT | i);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, 0);
-		out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, 0);
-	}
-
-	/* Kill the TCE cache */
-	out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG,
-		 in_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG) |
-		 PCIE_REG_PHBC_64B_TCE_EN);
-
-	/* Enable 32 & 64-bit MSIs, IO space and M32A */
-	val = PCIE_REG_PHBC_32BIT_MSI_EN |
-	      PCIE_REG_PHBC_IO_EN |
-	      PCIE_REG_PHBC_64BIT_MSI_EN |
-	      PCIE_REG_PHBC_M32A_EN;
-	if (iommu_is_off)
-		val |= PCIE_REG_PHBC_DMA_XLATE_BYPASS;
-	pr_debug("Will write config: 0x%llx\n", val);
-	out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG, val);
-
-	/* Enable error reporting */
-	out_be64(hose->cfg_data + 0xe00,
-		 in_be64(hose->cfg_data + 0xe00) | 0x0008000000000000ull);
-
-	/* Mask an error that's generated when doing config space probe
-	 *
-	 * XXX Maybe we should only mask it around config space cycles... that or
-	 * ignore it when we know we had a config space cycle recently ?
-	 */
-	out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS_MASK, 0x8000000000000000ull);
-	out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS_MASK, 0x8000000000000000ull);
-
-	/* Enable UTL errors, for now, all of them got to UTL irq 1
-	 *
-	 * We similarily mask one UTL error caused apparently during normal
-	 * probing. We also mask the link up error
-	 */
-	out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_ERR_SEV, 0);
-	out_be64(hose->cfg_data + PCIE_UTL_RC_ERR_SEVERITY, 0);
-	out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_ERROR_SEV, 0);
-	out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_IRQ_EN, 0xffffffff00000000ull);
-	out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_IRQ_EN, 0xff5fffff00000000ull);
-	out_be64(hose->cfg_data + PCIE_UTL_EP_ERR_IRQ_EN, 0xffffffff00000000ull);
-
-	DUMP_REG(PCIE_REG_IO_BASE_ADDR);
-	DUMP_REG(PCIE_REG_IO_BASE_MASK);
-	DUMP_REG(PCIE_REG_IO_START_ADDR);
-	DUMP_REG(PCIE_REG_M32A_BASE_ADDR);
-	DUMP_REG(PCIE_REG_M32A_BASE_MASK);
-	DUMP_REG(PCIE_REG_M32A_START_ADDR);
-	DUMP_REG(PCIE_REG_M32B_BASE_ADDR);
-	DUMP_REG(PCIE_REG_M32B_BASE_MASK);
-	DUMP_REG(PCIE_REG_M32B_START_ADDR);
-	DUMP_REG(PCIE_REG_M64_BASE_ADDR);
-	DUMP_REG(PCIE_REG_M64_BASE_MASK);
-	DUMP_REG(PCIE_REG_M64_START_ADDR);
-	DUMP_REG(PCIE_REG_PHB_CONFIG);
-}
-
-static void wsp_pci_wait_io_idle(struct wsp_phb *phb, unsigned long port)
-{
-	u64 val;
-	int i;
-
-	for (i = 0; i < 10000; i++) {
-		val = in_be64(phb->hose->cfg_data + 0xe08);
-		if ((val & 0x1900000000000000ull) == 0x0100000000000000ull)
-			return;
-		udelay(1);
-	}
-	pr_warning("PCI IO timeout on domain %d port 0x%lx\n",
-		   phb->hose->global_number, port);
-}
-
-#define DEF_PCI_AC_RET_pio(name, ret, at, al, aa)		\
-static ret wsp_pci_##name at					\
-{								\
-	struct iowa_bus *bus;					\
-	struct wsp_phb *phb;					\
-	unsigned long flags;					\
-	ret rval;						\
-	bus = iowa_pio_find_bus(aa);				\
-	WARN_ON(!bus);						\
-	phb = bus->private;					\
-	spin_lock_irqsave(&phb->lock, flags);			\
-	wsp_pci_wait_io_idle(phb, aa);				\
-	rval = __do_##name al;					\
-	spin_unlock_irqrestore(&phb->lock, flags);		\
-	return rval;						\
-}
-
-#define DEF_PCI_AC_NORET_pio(name, at, al, aa)			\
-static void wsp_pci_##name at					\
-{								\
-	struct iowa_bus *bus;					\
-	struct wsp_phb *phb;					\
-	unsigned long flags;					\
-	bus = iowa_pio_find_bus(aa);				\
-	WARN_ON(!bus);						\
-	phb = bus->private;					\
-	spin_lock_irqsave(&phb->lock, flags);			\
-	wsp_pci_wait_io_idle(phb, aa);				\
-	__do_##name al;						\
-	spin_unlock_irqrestore(&phb->lock, flags);		\
-}
-
-#define DEF_PCI_AC_RET_mem(name, ret, at, al, aa)
-#define DEF_PCI_AC_NORET_mem(name, at, al, aa)
-
-#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
-	DEF_PCI_AC_RET_##space(name, ret, at, al, aa)
-
-#define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
-	DEF_PCI_AC_NORET_##space(name, at, al, aa)		\
-
-
-#include <asm/io-defs.h>
-
-#undef DEF_PCI_AC_RET
-#undef DEF_PCI_AC_NORET
-
-static struct ppc_pci_io wsp_pci_iops = {
-	.inb = wsp_pci_inb,
-	.inw = wsp_pci_inw,
-	.inl = wsp_pci_inl,
-	.outb = wsp_pci_outb,
-	.outw = wsp_pci_outw,
-	.outl = wsp_pci_outl,
-	.insb = wsp_pci_insb,
-	.insw = wsp_pci_insw,
-	.insl = wsp_pci_insl,
-	.outsb = wsp_pci_outsb,
-	.outsw = wsp_pci_outsw,
-	.outsl = wsp_pci_outsl,
-};
-
-static int __init wsp_setup_one_phb(struct device_node *np)
-{
-	struct pci_controller *hose;
-	struct wsp_phb *phb;
-
-	pr_info("PCI: Setting up PCIe host bridge 0x%s\n", np->full_name);
-
-	phb = zalloc_maybe_bootmem(sizeof(struct wsp_phb), GFP_KERNEL);
-	if (!phb)
-		return -ENOMEM;
-	hose = pcibios_alloc_controller(np);
-	if (!hose) {
-		/* Can't really free the phb */
-		return -ENOMEM;
-	}
-	hose->private_data = phb;
-	phb->hose = hose;
-
-	INIT_LIST_HEAD(&phb->dma_tables);
-	spin_lock_init(&phb->lock);
-
-	/* XXX Use bus-range property ? */
-	hose->first_busno = 0;
-	hose->last_busno = 0xff;
-
-	/* We use cfg_data as the address for the whole bridge MMIO space
-	 */
-	hose->cfg_data = of_iomap(hose->dn, 0);
-
-	pr_debug("PCIe registers mapped at 0x%p\n", hose->cfg_data);
-
-	/* Get the ranges of the device-tree */
-	pci_process_bridge_OF_ranges(hose, np, 0);
-
-	/* XXX Force re-assigning of everything for now */
-	pci_add_flags(PCI_REASSIGN_ALL_BUS | PCI_REASSIGN_ALL_RSRC |
-		      PCI_ENABLE_PROC_DOMAINS);
-
-	/* Calculate how the TCE space is divided */
-	phb->dma32_base		= 0;
-	phb->dma32_num_regions	= NUM_DMA32_REGIONS;
-	if (phb->dma32_num_regions > MAX_TABLE_TVT_COUNT) {
-		pr_warning("IOMMU: Clamped to %d DMA32 regions\n",
-			   MAX_TABLE_TVT_COUNT);
-		phb->dma32_num_regions = MAX_TABLE_TVT_COUNT;
-	}
-	phb->dma32_region_size	= 0x80000000 / phb->dma32_num_regions;
-
-	BUG_ON(!is_power_of_2(phb->dma32_region_size));
-
-	/* Setup config ops */
-	hose->ops = &wsp_pcie_pci_ops;
-
-	/* Configure the HW */
-	wsp_pcie_configure_hw(hose);
-
-	/* Instanciate IO workarounds */
-	iowa_register_bus(hose, &wsp_pci_iops, NULL, phb);
-#ifdef CONFIG_PCI_MSI
-	wsp_setup_phb_msi(hose);
-#endif
-
-	/* Add to global list */
-	list_add(&phb->all, &wsp_phbs);
-
-	return 0;
-}
-
-void __init wsp_setup_pci(void)
-{
-	struct device_node *np;
-	int rc;
-
-	/* Find host bridges */
-	for_each_compatible_node(np, "pciex", PCIE_COMPATIBLE) {
-		rc = wsp_setup_one_phb(np);
-		if (rc)
-			pr_err("Failed to setup PCIe bridge %s, rc=%d\n",
-			       np->full_name, rc);
-	}
-
-	/* Establish device-tree linkage */
-	pci_devs_phb_init();
-
-	/* Set DMA ops to use TCEs */
-	if (iommu_is_off) {
-		pr_info("PCI-E: Disabled TCEs, using direct DMA\n");
-		set_pci_dma_ops(&dma_direct_ops);
-	} else {
-		ppc_md.pci_dma_dev_setup = wsp_pci_dma_dev_setup;
-		ppc_md.tce_build = tce_build_wsp;
-		ppc_md.tce_free = tce_free_wsp;
-		set_pci_dma_ops(&dma_iommu_ops);
-	}
-}
-
-#define err_debug(fmt...)	pr_debug(fmt)
-//#define err_debug(fmt...)
-
-static int __init wsp_pci_get_err_irq_no_dt(struct device_node *np)
-{
-	const u32 *prop;
-	int hw_irq;
-
-	/* Ok, no interrupts property, let's try to find our child P2P */
-	np = of_get_next_child(np, NULL);
-	if (np == NULL)
-		return 0;
-
-	/* Grab it's interrupt map */
-	prop = of_get_property(np, "interrupt-map", NULL);
-	if (prop == NULL)
-		return 0;
-
-	/* Grab one of the interrupts in there, keep the low 4 bits */
-	hw_irq = prop[5] & 0xf;
-
-	/* 0..4 for PHB 0 and 5..9 for PHB 1 */
-	if (hw_irq < 5)
-		hw_irq = 4;
-	else
-		hw_irq = 9;
-	hw_irq |= prop[5] & ~0xf;
-
-	err_debug("PCI: Using 0x%x as error IRQ for %s\n",
-		  hw_irq, np->parent->full_name);
-	return irq_create_mapping(NULL, hw_irq);
-}
-
-static const struct {
-	u32 offset;
-	const char *name;
-} wsp_pci_regs[] = {
-#define DREG(x) { PCIE_REG_##x, #x }
-#define DUTL(x) { PCIE_UTL_##x, "UTL_" #x }
-	/* Architected registers except CONFIG_ and IODA
-         * to avoid side effects
-	 */
-	DREG(DMA_CHAN_STATUS),
-	DREG(CPU_LOADSTORE_STATUS),
-	DREG(LOCK0),
-	DREG(LOCK1),
-	DREG(PHB_CONFIG),
-	DREG(IO_BASE_ADDR),
-	DREG(IO_BASE_MASK),
-	DREG(IO_START_ADDR),
-	DREG(M32A_BASE_ADDR),
-	DREG(M32A_BASE_MASK),
-	DREG(M32A_START_ADDR),
-	DREG(M32B_BASE_ADDR),
-	DREG(M32B_BASE_MASK),
-	DREG(M32B_START_ADDR),
-	DREG(M64_BASE_ADDR),
-	DREG(M64_BASE_MASK),
-	DREG(M64_START_ADDR),
-	DREG(TCE_KILL),
-	DREG(LOCK2),
-	DREG(PHB_GEN_CAP),
-	DREG(PHB_TCE_CAP),
-	DREG(PHB_IRQ_CAP),
-	DREG(PHB_EEH_CAP),
-	DREG(PAPR_ERR_INJ_CONTROL),
-	DREG(PAPR_ERR_INJ_ADDR),
-	DREG(PAPR_ERR_INJ_MASK),
-
-	/* UTL core regs */
-	DUTL(SYS_BUS_CONTROL),
-	DUTL(STATUS),
-	DUTL(SYS_BUS_AGENT_STATUS),
-	DUTL(SYS_BUS_AGENT_ERR_SEV),
-	DUTL(SYS_BUS_AGENT_IRQ_EN),
-	DUTL(SYS_BUS_BURST_SZ_CONF),
-	DUTL(REVISION_ID),
-	DUTL(OUT_POST_HDR_BUF_ALLOC),
-	DUTL(OUT_POST_DAT_BUF_ALLOC),
-	DUTL(IN_POST_HDR_BUF_ALLOC),
-	DUTL(IN_POST_DAT_BUF_ALLOC),
-	DUTL(OUT_NP_BUF_ALLOC),
-	DUTL(IN_NP_BUF_ALLOC),
-	DUTL(PCIE_TAGS_ALLOC),
-	DUTL(GBIF_READ_TAGS_ALLOC),
-
-	DUTL(PCIE_PORT_CONTROL),
-	DUTL(PCIE_PORT_STATUS),
-	DUTL(PCIE_PORT_ERROR_SEV),
-	DUTL(PCIE_PORT_IRQ_EN),
-	DUTL(RC_STATUS),
-	DUTL(RC_ERR_SEVERITY),
-	DUTL(RC_IRQ_EN),
-	DUTL(EP_STATUS),
-	DUTL(EP_ERR_SEVERITY),
-	DUTL(EP_ERR_IRQ_EN),
-	DUTL(PCI_PM_CTRL1),
-	DUTL(PCI_PM_CTRL2),
-
-	/* PCIe stack regs */
-	DREG(SYSTEM_CONFIG1),
-	DREG(SYSTEM_CONFIG2),
-	DREG(EP_SYSTEM_CONFIG),
-	DREG(EP_FLR),
-	DREG(EP_BAR_CONFIG),
-	DREG(LINK_CONFIG),
-	DREG(PM_CONFIG),
-	DREG(DLP_CONTROL),
-	DREG(DLP_STATUS),
-	DREG(ERR_REPORT_CONTROL),
-	DREG(SLOT_CONTROL1),
-	DREG(SLOT_CONTROL2),
-	DREG(UTL_CONFIG),
-	DREG(BUFFERS_CONFIG),
-	DREG(ERROR_INJECT),
-	DREG(SRIOV_CONFIG),
-	DREG(PF0_SRIOV_STATUS),
-	DREG(PF1_SRIOV_STATUS),
-	DREG(PORT_NUMBER),
-	DREG(POR_SYSTEM_CONFIG),
-
-	/* Internal logic regs */
-	DREG(PHB_VERSION),
-	DREG(RESET),
-	DREG(PHB_CONTROL),
-	DREG(PHB_TIMEOUT_CONTROL1),
-	DREG(PHB_QUIESCE_DMA),
-	DREG(PHB_DMA_READ_TAG_ACTV),
-	DREG(PHB_TCE_READ_TAG_ACTV),
-
-	/* FIR registers */
-	DREG(LEM_FIR_ACCUM),
-	DREG(LEM_FIR_AND_MASK),
-	DREG(LEM_FIR_OR_MASK),
-	DREG(LEM_ACTION0),
-	DREG(LEM_ACTION1),
-	DREG(LEM_ERROR_MASK),
-	DREG(LEM_ERROR_AND_MASK),
-	DREG(LEM_ERROR_OR_MASK),
-
-	/* Error traps registers */
-	DREG(PHB_ERR_STATUS),
-	DREG(PHB_ERR_STATUS),
-	DREG(PHB_ERR1_STATUS),
-	DREG(PHB_ERR_INJECT),
-	DREG(PHB_ERR_LEM_ENABLE),
-	DREG(PHB_ERR_IRQ_ENABLE),
-	DREG(PHB_ERR_FREEZE_ENABLE),
-	DREG(PHB_ERR_SIDE_ENABLE),
-	DREG(PHB_ERR_LOG_0),
-	DREG(PHB_ERR_LOG_1),
-	DREG(PHB_ERR_STATUS_MASK),
-	DREG(PHB_ERR1_STATUS_MASK),
-	DREG(MMIO_ERR_STATUS),
-	DREG(MMIO_ERR1_STATUS),
-	DREG(MMIO_ERR_INJECT),
-	DREG(MMIO_ERR_LEM_ENABLE),
-	DREG(MMIO_ERR_IRQ_ENABLE),
-	DREG(MMIO_ERR_FREEZE_ENABLE),
-	DREG(MMIO_ERR_SIDE_ENABLE),
-	DREG(MMIO_ERR_LOG_0),
-	DREG(MMIO_ERR_LOG_1),
-	DREG(MMIO_ERR_STATUS_MASK),
-	DREG(MMIO_ERR1_STATUS_MASK),
-	DREG(DMA_ERR_STATUS),
-	DREG(DMA_ERR1_STATUS),
-	DREG(DMA_ERR_INJECT),
-	DREG(DMA_ERR_LEM_ENABLE),
-	DREG(DMA_ERR_IRQ_ENABLE),
-	DREG(DMA_ERR_FREEZE_ENABLE),
-	DREG(DMA_ERR_SIDE_ENABLE),
-	DREG(DMA_ERR_LOG_0),
-	DREG(DMA_ERR_LOG_1),
-	DREG(DMA_ERR_STATUS_MASK),
-	DREG(DMA_ERR1_STATUS_MASK),
-
-	/* Debug and Trace registers */
-	DREG(PHB_DEBUG_CONTROL0),
-	DREG(PHB_DEBUG_STATUS0),
-	DREG(PHB_DEBUG_CONTROL1),
-	DREG(PHB_DEBUG_STATUS1),
-	DREG(PHB_DEBUG_CONTROL2),
-	DREG(PHB_DEBUG_STATUS2),
-	DREG(PHB_DEBUG_CONTROL3),
-	DREG(PHB_DEBUG_STATUS3),
-	DREG(PHB_DEBUG_CONTROL4),
-	DREG(PHB_DEBUG_STATUS4),
-	DREG(PHB_DEBUG_CONTROL5),
-	DREG(PHB_DEBUG_STATUS5),
-
-	/* Don't seem to exist ...
-	DREG(PHB_DEBUG_CONTROL6),
-	DREG(PHB_DEBUG_STATUS6),
-	*/
-};
-
-static int wsp_pci_regs_show(struct seq_file *m, void *private)
-{
-	struct wsp_phb *phb = m->private;
-	struct pci_controller *hose = phb->hose;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) {
-		/* Skip write-only regs */
-		if (wsp_pci_regs[i].offset == 0xc08 ||
-		    wsp_pci_regs[i].offset == 0xc10 ||
-		    wsp_pci_regs[i].offset == 0xc38 ||
-		    wsp_pci_regs[i].offset == 0xc40)
-			continue;
-		seq_printf(m, "0x%03x: 0x%016llx %s\n",
-			   wsp_pci_regs[i].offset,
-			   in_be64(hose->cfg_data + wsp_pci_regs[i].offset),
-			   wsp_pci_regs[i].name);
-	}
-	return 0;
-}
-
-static int wsp_pci_regs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, wsp_pci_regs_show, inode->i_private);
-}
-
-static const struct file_operations wsp_pci_regs_fops = {
-	.open = wsp_pci_regs_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int wsp_pci_reg_set(void *data, u64 val)
-{
-	out_be64((void __iomem *)data, val);
-	return 0;
-}
-
-static int wsp_pci_reg_get(void *data, u64 *val)
-{
-	*val = in_be64((void __iomem *)data);
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(wsp_pci_reg_fops, wsp_pci_reg_get, wsp_pci_reg_set, "0x%llx\n");
-
-static irqreturn_t wsp_pci_err_irq(int irq, void *dev_id)
-{
-	struct wsp_phb *phb = dev_id;
-	struct pci_controller *hose = phb->hose;
-	irqreturn_t handled = IRQ_NONE;
-	struct wsp_pcie_err_log_data ed;
-
-	pr_err("PCI: Error interrupt on %s (PHB %d)\n",
-	       hose->dn->full_name, hose->global_number);
- again:
-	memset(&ed, 0, sizeof(ed));
-
-	/* Read and clear UTL errors */
-	ed.utl_sys_err = in_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS);
-	if (ed.utl_sys_err)
-		out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS, ed.utl_sys_err);
-	ed.utl_port_err = in_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS);
-	if (ed.utl_port_err)
-		out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS, ed.utl_port_err);
-	ed.utl_rc_err = in_be64(hose->cfg_data + PCIE_UTL_RC_STATUS);
-	if (ed.utl_rc_err)
-		out_be64(hose->cfg_data + PCIE_UTL_RC_STATUS, ed.utl_rc_err);
-
-	/* Read and clear main trap errors */
-	ed.phb_err = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS);
-	if (ed.phb_err) {
-		ed.phb_err1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS);
-		ed.phb_log0 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_0);
-		ed.phb_log1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_1);
-		out_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS, 0);
-		out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS, 0);
-	}
-	ed.mmio_err = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS);
-	if (ed.mmio_err) {
-		ed.mmio_err1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS);
-		ed.mmio_log0 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_0);
-		ed.mmio_log1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_1);
-		out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS, 0);
-		out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS, 0);
-	}
-	ed.dma_err = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS);
-	if (ed.dma_err) {
-		ed.dma_err1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS);
-		ed.dma_log0 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_0);
-		ed.dma_log1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_1);
-		out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS, 0);
-		out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS, 0);
-	}
-
-	/* Now print things out */
-	if (ed.phb_err) {
-		pr_err("   PHB Error Status      : 0x%016llx\n", ed.phb_err);
-		pr_err("   PHB First Error Status: 0x%016llx\n", ed.phb_err1);
-		pr_err("   PHB Error Log 0       : 0x%016llx\n", ed.phb_log0);
-		pr_err("   PHB Error Log 1       : 0x%016llx\n", ed.phb_log1);
-	}
-	if (ed.mmio_err) {
-		pr_err("  MMIO Error Status      : 0x%016llx\n", ed.mmio_err);
-		pr_err("  MMIO First Error Status: 0x%016llx\n", ed.mmio_err1);
-		pr_err("  MMIO Error Log 0       : 0x%016llx\n", ed.mmio_log0);
-		pr_err("  MMIO Error Log 1       : 0x%016llx\n", ed.mmio_log1);
-	}
-	if (ed.dma_err) {
-		pr_err("   DMA Error Status      : 0x%016llx\n", ed.dma_err);
-		pr_err("   DMA First Error Status: 0x%016llx\n", ed.dma_err1);
-		pr_err("   DMA Error Log 0       : 0x%016llx\n", ed.dma_log0);
-		pr_err("   DMA Error Log 1       : 0x%016llx\n", ed.dma_log1);
-	}
-	if (ed.utl_sys_err)
-		pr_err("   UTL Sys Error Status  : 0x%016llx\n", ed.utl_sys_err);
-	if (ed.utl_port_err)
-		pr_err("   UTL Port Error Status : 0x%016llx\n", ed.utl_port_err);
-	if (ed.utl_rc_err)
-		pr_err("   UTL RC Error Status   : 0x%016llx\n", ed.utl_rc_err);
-
-	/* Interrupts are caused by the error traps. If we had any error there
-	 * we loop again in case the UTL buffered some new stuff between
-	 * going there and going to the traps
-	 */
-	if (ed.dma_err || ed.mmio_err || ed.phb_err) {
-		handled = IRQ_HANDLED;
-		goto again;
-	}
-	return handled;
-}
-
-static void __init wsp_setup_pci_err_reporting(struct wsp_phb *phb)
-{
-	struct pci_controller *hose = phb->hose;
-	int err_irq, i, rc;
-	char fname[16];
-
-	/* Create a debugfs file for that PHB */
-	sprintf(fname, "phb%d", phb->hose->global_number);
-	phb->ddir = debugfs_create_dir(fname, powerpc_debugfs_root);
-
-	/* Some useful debug output */
-	if (phb->ddir) {
-		struct dentry *d = debugfs_create_dir("regs", phb->ddir);
-		char tmp[64];
-
-		for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) {
-			sprintf(tmp, "%03x_%s", wsp_pci_regs[i].offset,
-				wsp_pci_regs[i].name);
-			debugfs_create_file(tmp, 0600, d,
-					    hose->cfg_data + wsp_pci_regs[i].offset,
-					    &wsp_pci_reg_fops);
-		}
-		debugfs_create_file("all_regs", 0600, phb->ddir, phb, &wsp_pci_regs_fops);
-	}
-
-	/* Find the IRQ number for that PHB */
-	err_irq = irq_of_parse_and_map(hose->dn, 0);
-	if (err_irq == 0)
-		/* XXX Error IRQ lacking from device-tree */
-		err_irq = wsp_pci_get_err_irq_no_dt(hose->dn);
-	if (err_irq == 0) {
-		pr_err("PCI: Failed to fetch error interrupt for %s\n",
-		       hose->dn->full_name);
-		return;
-	}
-	/* Request it */
-	rc = request_irq(err_irq, wsp_pci_err_irq, 0, "wsp_pci error", phb);
-	if (rc) {
-		pr_err("PCI: Failed to request interrupt for %s\n",
-		       hose->dn->full_name);
-	}
-	/* Enable interrupts for all errors for now */
-	out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_IRQ_ENABLE, 0xffffffffffffffffull);
-	out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_IRQ_ENABLE, 0xffffffffffffffffull);
-	out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_IRQ_ENABLE, 0xffffffffffffffffull);
-}
-
-/*
- * This is called later to hookup with the error interrupt
- */
-static int __init wsp_setup_pci_late(void)
-{
-	struct wsp_phb *phb;
-
-	list_for_each_entry(phb, &wsp_phbs, all)
-		wsp_setup_pci_err_reporting(phb);
-
-	return 0;
-}
-arch_initcall(wsp_setup_pci_late);
diff --git a/arch/powerpc/platforms/wsp/wsp_pci.h b/arch/powerpc/platforms/wsp/wsp_pci.h
deleted file mode 100644
index 52e9bd95250..00000000000
--- a/arch/powerpc/platforms/wsp/wsp_pci.h
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Copyright 2010 Ben Herrenschmidt, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef __WSP_PCI_H
-#define __WSP_PCI_H
-
-/* Architected registers */
-#define PCIE_REG_DMA_CHAN_STATUS	0x110
-#define PCIE_REG_CPU_LOADSTORE_STATUS	0x120
-
-#define PCIE_REG_CONFIG_DATA		0x130
-#define PCIE_REG_LOCK0			0x138
-#define PCIE_REG_CONFIG_ADDRESS		0x140
-#define   PCIE_REG_CA_ENABLE			0x8000000000000000ull
-#define	  PCIE_REG_CA_BUS_MASK			0x0ff0000000000000ull
-#define   PCIE_REG_CA_BUS_SHIFT			(20+32)
-#define   PCIE_REG_CA_DEV_MASK			0x000f800000000000ull
-#define   PCIE_REG_CA_DEV_SHIFT			(15+32)
-#define   PCIE_REG_CA_FUNC_MASK			0x0000700000000000ull
-#define   PCIE_REG_CA_FUNC_SHIFT		(12+32)
-#define   PCIE_REG_CA_REG_MASK			0x00000fff00000000ull
-#define   PCIE_REG_CA_REG_SHIFT			( 0+32)
-#define   PCIE_REG_CA_BE_MASK			0x00000000f0000000ull
-#define   PCIE_REG_CA_BE_SHIFT			(   28)
-#define PCIE_REG_LOCK1			0x148
-
-#define PCIE_REG_PHB_CONFIG		0x160
-#define   PCIE_REG_PHBC_64B_TCE_EN		0x2000000000000000ull
-#define   PCIE_REG_PHBC_MMIO_DMA_FREEZE_EN	0x1000000000000000ull
-#define   PCIE_REG_PHBC_32BIT_MSI_EN		0x0080000000000000ull
-#define   PCIE_REG_PHBC_M64_EN			0x0040000000000000ull
-#define   PCIE_REG_PHBC_IO_EN			0x0008000000000000ull
-#define   PCIE_REG_PHBC_64BIT_MSI_EN		0x0002000000000000ull
-#define   PCIE_REG_PHBC_M32A_EN			0x0000800000000000ull
-#define   PCIE_REG_PHBC_M32B_EN			0x0000400000000000ull
-#define   PCIE_REG_PHBC_MSI_PE_VALIDATE		0x0000200000000000ull
-#define   PCIE_REG_PHBC_DMA_XLATE_BYPASS	0x0000100000000000ull
-
-#define PCIE_REG_IO_BASE_ADDR		0x170
-#define PCIE_REG_IO_BASE_MASK		0x178
-#define PCIE_REG_IO_START_ADDR		0x180
-
-#define PCIE_REG_M32A_BASE_ADDR		0x190
-#define PCIE_REG_M32A_BASE_MASK		0x198
-#define PCIE_REG_M32A_START_ADDR	0x1a0
-
-#define PCIE_REG_M32B_BASE_ADDR		0x1b0
-#define PCIE_REG_M32B_BASE_MASK		0x1b8
-#define PCIE_REG_M32B_START_ADDR	0x1c0
-
-#define PCIE_REG_M64_BASE_ADDR		0x1e0
-#define PCIE_REG_M64_BASE_MASK		0x1e8
-#define PCIE_REG_M64_START_ADDR		0x1f0
-
-#define PCIE_REG_TCE_KILL		0x210
-#define   PCIE_REG_TCEKILL_SINGLE	0x8000000000000000ull
-#define   PCIE_REG_TCEKILL_ADDR_MASK	0x000003fffffffff8ull
-#define   PCIE_REG_TCEKILL_PS_4K	0
-#define   PCIE_REG_TCEKILL_PS_64K	1
-#define   PCIE_REG_TCEKILL_PS_16M	2
-#define   PCIE_REG_TCEKILL_PS_16G	3
-
-#define PCIE_REG_IODA_ADDR		0x220
-#define   PCIE_REG_IODA_AD_AUTOINC	0x8000000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_MVT	0x0005000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_PELT	0x0006000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_PESTA	0x0007000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_PESTB	0x0008000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_TVT	0x0009000000000000ull
-#define   PCIE_REG_IODA_AD_TBL_TCE	0x000a000000000000ull
-#define PCIE_REG_IODA_DATA0		0x228
-#define PCIE_REG_IODA_DATA1		0x230
-
-#define PCIE_REG_LOCK2			0x240
-
-#define PCIE_REG_PHB_GEN_CAP		0x250
-#define PCIE_REG_PHB_TCE_CAP		0x258
-#define PCIE_REG_PHB_IRQ_CAP		0x260
-#define PCIE_REG_PHB_EEH_CAP		0x268
-
-#define PCIE_REG_PAPR_ERR_INJ_CONTROL	0x2b0
-#define PCIE_REG_PAPR_ERR_INJ_ADDR	0x2b8
-#define PCIE_REG_PAPR_ERR_INJ_MASK	0x2c0
-
-
-#define PCIE_REG_SYS_CFG1		0x600
-#define   PCIE_REG_SYS_CFG1_CLASS_CODE	0x0000000000ffffffull
-
-#define IODA_TVT0_TTA_MASK		0x000fffffffff0000ull
-#define IODA_TVT0_TTA_SHIFT		4
-#define IODA_TVT0_BUSNUM_VALID_MASK	0x000000000000e000ull
-#define IODA_TVT0_TCE_TABLE_SIZE_MASK	0x0000000000001f00ull
-#define IODA_TVT0_TCE_TABLE_SIZE_SHIFT	8
-#define IODA_TVT0_BUSNUM_VALUE_MASK	0x00000000000000ffull
-#define IODA_TVT0_BUSNUM_VALID_SHIFT	0
-#define IODA_TVT1_DEVNUM_VALID		0x2000000000000000ull
-#define IODA_TVT1_DEVNUM_VALUE_MASK	0x1f00000000000000ull
-#define IODA_TVT1_DEVNUM_VALUE_SHIFT	56
-#define IODA_TVT1_FUNCNUM_VALID		0x0008000000000000ull
-#define IODA_TVT1_FUNCNUM_VALUE_MASK	0x0007000000000000ull
-#define IODA_TVT1_FUNCNUM_VALUE_SHIFT	48
-#define IODA_TVT1_IO_PAGE_SIZE_MASK	0x00001f0000000000ull
-#define IODA_TVT1_IO_PAGE_SIZE_SHIFT	40
-#define IODA_TVT1_PE_NUMBER_MASK	0x000000000000003full
-#define IODA_TVT1_PE_NUMBER_SHIFT	0
-
-#define IODA_TVT_COUNT			64
-
-/* UTL Core registers */
-#define PCIE_UTL_SYS_BUS_CONTROL	0x400
-#define PCIE_UTL_STATUS			0x408
-#define PCIE_UTL_SYS_BUS_AGENT_STATUS	0x410
-#define PCIE_UTL_SYS_BUS_AGENT_ERR_SEV	0x418
-#define PCIE_UTL_SYS_BUS_AGENT_IRQ_EN	0x420
-#define PCIE_UTL_SYS_BUS_BURST_SZ_CONF	0x440
-#define PCIE_UTL_REVISION_ID		0x448
-
-#define PCIE_UTL_OUT_POST_HDR_BUF_ALLOC	0x4c0
-#define PCIE_UTL_OUT_POST_DAT_BUF_ALLOC	0x4d0
-#define PCIE_UTL_IN_POST_HDR_BUF_ALLOC	0x4e0
-#define PCIE_UTL_IN_POST_DAT_BUF_ALLOC	0x4f0
-#define PCIE_UTL_OUT_NP_BUF_ALLOC	0x500
-#define PCIE_UTL_IN_NP_BUF_ALLOC	0x510
-#define PCIE_UTL_PCIE_TAGS_ALLOC	0x520
-#define PCIE_UTL_GBIF_READ_TAGS_ALLOC	0x530
-
-#define PCIE_UTL_PCIE_PORT_CONTROL	0x540
-#define PCIE_UTL_PCIE_PORT_STATUS	0x548
-#define PCIE_UTL_PCIE_PORT_ERROR_SEV	0x550
-#define PCIE_UTL_PCIE_PORT_IRQ_EN	0x558
-#define PCIE_UTL_RC_STATUS		0x560
-#define PCIE_UTL_RC_ERR_SEVERITY	0x568
-#define PCIE_UTL_RC_IRQ_EN		0x570
-#define PCIE_UTL_EP_STATUS		0x578
-#define PCIE_UTL_EP_ERR_SEVERITY	0x580
-#define PCIE_UTL_EP_ERR_IRQ_EN		0x588
-
-#define PCIE_UTL_PCI_PM_CTRL1		0x590
-#define PCIE_UTL_PCI_PM_CTRL2		0x598
-
-/* PCIe stack registers */
-#define PCIE_REG_SYSTEM_CONFIG1		0x600
-#define PCIE_REG_SYSTEM_CONFIG2		0x608
-#define PCIE_REG_EP_SYSTEM_CONFIG	0x618
-#define PCIE_REG_EP_FLR			0x620
-#define PCIE_REG_EP_BAR_CONFIG		0x628
-#define PCIE_REG_LINK_CONFIG		0x630
-#define PCIE_REG_PM_CONFIG		0x640
-#define PCIE_REG_DLP_CONTROL		0x650
-#define PCIE_REG_DLP_STATUS		0x658
-#define PCIE_REG_ERR_REPORT_CONTROL	0x660
-#define PCIE_REG_SLOT_CONTROL1		0x670
-#define PCIE_REG_SLOT_CONTROL2		0x678
-#define PCIE_REG_UTL_CONFIG		0x680
-#define PCIE_REG_BUFFERS_CONFIG		0x690
-#define PCIE_REG_ERROR_INJECT		0x698
-#define PCIE_REG_SRIOV_CONFIG		0x6a0
-#define PCIE_REG_PF0_SRIOV_STATUS	0x6a8
-#define PCIE_REG_PF1_SRIOV_STATUS	0x6b0
-#define PCIE_REG_PORT_NUMBER		0x700
-#define PCIE_REG_POR_SYSTEM_CONFIG	0x708
-
-/* PHB internal logic registers */
-#define PCIE_REG_PHB_VERSION		0x800
-#define PCIE_REG_RESET			0x808
-#define PCIE_REG_PHB_CONTROL		0x810
-#define PCIE_REG_PHB_TIMEOUT_CONTROL1	0x878
-#define PCIE_REG_PHB_QUIESCE_DMA	0x888
-#define PCIE_REG_PHB_DMA_READ_TAG_ACTV	0x900
-#define PCIE_REG_PHB_TCE_READ_TAG_ACTV	0x908
-
-/* FIR registers */
-#define PCIE_REG_LEM_FIR_ACCUM		0xc00
-#define PCIE_REG_LEM_FIR_AND_MASK	0xc08
-#define PCIE_REG_LEM_FIR_OR_MASK	0xc10
-#define PCIE_REG_LEM_ACTION0		0xc18
-#define PCIE_REG_LEM_ACTION1		0xc20
-#define PCIE_REG_LEM_ERROR_MASK		0xc30
-#define PCIE_REG_LEM_ERROR_AND_MASK	0xc38
-#define PCIE_REG_LEM_ERROR_OR_MASK	0xc40
-
-/* PHB Error registers */
-#define PCIE_REG_PHB_ERR_STATUS		0xc80
-#define PCIE_REG_PHB_ERR1_STATUS	0xc88
-#define PCIE_REG_PHB_ERR_INJECT		0xc90
-#define PCIE_REG_PHB_ERR_LEM_ENABLE	0xc98
-#define PCIE_REG_PHB_ERR_IRQ_ENABLE	0xca0
-#define PCIE_REG_PHB_ERR_FREEZE_ENABLE	0xca8
-#define PCIE_REG_PHB_ERR_SIDE_ENABLE	0xcb8
-#define PCIE_REG_PHB_ERR_LOG_0		0xcc0
-#define PCIE_REG_PHB_ERR_LOG_1		0xcc8
-#define PCIE_REG_PHB_ERR_STATUS_MASK	0xcd0
-#define PCIE_REG_PHB_ERR1_STATUS_MASK	0xcd8
-
-#define PCIE_REG_MMIO_ERR_STATUS	0xd00
-#define PCIE_REG_MMIO_ERR1_STATUS	0xd08
-#define PCIE_REG_MMIO_ERR_INJECT	0xd10
-#define PCIE_REG_MMIO_ERR_LEM_ENABLE	0xd18
-#define PCIE_REG_MMIO_ERR_IRQ_ENABLE	0xd20
-#define PCIE_REG_MMIO_ERR_FREEZE_ENABLE	0xd28
-#define PCIE_REG_MMIO_ERR_SIDE_ENABLE	0xd38
-#define PCIE_REG_MMIO_ERR_LOG_0		0xd40
-#define PCIE_REG_MMIO_ERR_LOG_1		0xd48
-#define PCIE_REG_MMIO_ERR_STATUS_MASK	0xd50
-#define PCIE_REG_MMIO_ERR1_STATUS_MASK	0xd58
-
-#define PCIE_REG_DMA_ERR_STATUS		0xd80
-#define PCIE_REG_DMA_ERR1_STATUS	0xd88
-#define PCIE_REG_DMA_ERR_INJECT		0xd90
-#define PCIE_REG_DMA_ERR_LEM_ENABLE	0xd98
-#define PCIE_REG_DMA_ERR_IRQ_ENABLE	0xda0
-#define PCIE_REG_DMA_ERR_FREEZE_ENABLE	0xda8
-#define PCIE_REG_DMA_ERR_SIDE_ENABLE	0xdb8
-#define PCIE_REG_DMA_ERR_LOG_0		0xdc0
-#define PCIE_REG_DMA_ERR_LOG_1		0xdc8
-#define PCIE_REG_DMA_ERR_STATUS_MASK	0xdd0
-#define PCIE_REG_DMA_ERR1_STATUS_MASK	0xdd8
-
-/* Shortcuts for access to the above using the PHB definitions
- * with an offset
- */
-#define PCIE_REG_ERR_PHB_OFFSET		0x0
-#define PCIE_REG_ERR_MMIO_OFFSET	0x80
-#define PCIE_REG_ERR_DMA_OFFSET		0x100
-
-/* Debug and Trace registers */
-#define PCIE_REG_PHB_DEBUG_CONTROL0	0xe00
-#define PCIE_REG_PHB_DEBUG_STATUS0	0xe08
-#define PCIE_REG_PHB_DEBUG_CONTROL1	0xe10
-#define PCIE_REG_PHB_DEBUG_STATUS1	0xe18
-#define PCIE_REG_PHB_DEBUG_CONTROL2	0xe20
-#define PCIE_REG_PHB_DEBUG_STATUS2	0xe28
-#define PCIE_REG_PHB_DEBUG_CONTROL3	0xe30
-#define PCIE_REG_PHB_DEBUG_STATUS3	0xe38
-#define PCIE_REG_PHB_DEBUG_CONTROL4	0xe40
-#define PCIE_REG_PHB_DEBUG_STATUS4	0xe48
-#define PCIE_REG_PHB_DEBUG_CONTROL5	0xe50
-#define PCIE_REG_PHB_DEBUG_STATUS5	0xe58
-#define PCIE_REG_PHB_DEBUG_CONTROL6	0xe60
-#define PCIE_REG_PHB_DEBUG_STATUS6	0xe68
-
-/* Definition for PCIe errors */
-struct wsp_pcie_err_log_data {
-	__u64	phb_err;
-	__u64	phb_err1;
-	__u64	phb_log0;
-	__u64	phb_log1;
-	__u64	mmio_err;
-	__u64	mmio_err1;
-	__u64	mmio_log0;
-	__u64	mmio_log1;
-	__u64	dma_err;
-	__u64	dma_err1;
-	__u64	dma_log0;
-	__u64	dma_log1;
-	__u64	utl_sys_err;
-	__u64	utl_port_err;
-	__u64	utl_rc_err;
-	__u64	unused;
-};
-
-#endif /* __WSP_PCI_H */