ALSA: hda/realtek: remove redundant semicolon

Having just one semicolon after a break statement is enough. Signed-off-by: Jesper Juhl <jj@chaosbits.net> Signed-off-by: Takashi Iwai <tiwai@suse.de>
author: Jesper Juhl <jj@chaosbits.net> 2011-11-13 23:11:50 +0100
committer: Takashi Iwai <tiwai@suse.de> 2011-11-14 10:41:46 +0100
commit: e53de8f00c80fd1a312c95bc5157fdb98d46e070 (patch)
tree: 98c61d5bf0fd658de9e6c97cd621cfcdba9ecb0c /fs/jbd/commit.c
parent: afef2cfa0ecf45dec7e88db9fa312ee82d347111 (diff)
0 files changed, 0 insertions, 0 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 8dacd4c9ee8..ee9402324a2 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -54,6 +54,7 @@ config AD525X_DPOT_SPI
 config ATMEL_PWM
 	tristate "Atmel AT32/AT91 PWM support"
 	depends on HAVE_CLK
+	depends on AVR32 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_AT91SAM9G45
 	help
 	  This option enables device driver support for the PWM channels
 	  on certain Atmel processors.  Pulse Width Modulation is used for
@@ -200,7 +201,7 @@ config ICS932S401
 
 config ATMEL_SSC
 	tristate "Device driver for Atmel SSC peripheral"
-	depends on HAS_IOMEM
+	depends on HAS_IOMEM && (AVR32 || ARCH_AT91 || COMPILE_TEST)
 	---help---
 	  This option enables device driver support for Atmel Synchronized
 	  Serial Communication peripheral (SSC).
@@ -235,7 +236,7 @@ config SGI_XP
 
 config CS5535_MFGPT
 	tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support"
-	depends on PCI && X86 && MFD_CS5535
+	depends on MFD_CS5535
 	default n
 	help
 	  This driver provides access to MFGPT functionality for other
@@ -300,8 +301,8 @@ config SGI_GRU_DEBUG
 	depends on SGI_GRU
 	default n
 	---help---
-	This option enables addition debugging code for the SGI GRU driver. If
-	you are unsure, say N.
+	This option enables additional debugging code for the SGI GRU driver.
+	If you are unsure, say N.
 
 config APDS9802ALS
 	tristate "Medfield Avago APDS9802 ALS Sensor module"
@@ -381,19 +382,6 @@ config HMC6352
 	  This driver provides support for the Honeywell HMC6352 compass,
 	  providing configuration and heading data via sysfs.
 
-config EP93XX_PWM
-	tristate "EP93xx PWM support"
-	depends on ARCH_EP93XX
-	help
-	  This option enables device driver support for the PWM channels
-	  on the Cirrus EP93xx processors.  The EP9307 chip only has one
-	  PWM channel all the others have two, the second channel is an
-	  alternate function of the EGPIO14 pin.  A sysfs interface is
-	  provided to control the PWM channels.
-
-	  To compile this driver as a module, choose M here: the module will
-	  be called ep93xx_pwm.
-
 config DS1682
 	tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm"
 	depends on I2C
@@ -481,7 +469,7 @@ config BMP085_SPI
 config PCH_PHUB
 	tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) PHUB"
 	select GENERIC_NET_UTILS
-	depends on PCI
+	depends on PCI && (X86_32 || COMPILE_TEST)
 	help
 	  This driver is for PCH(Platform controller Hub) PHUB(Packet Hub) of
 	  Intel Topcliff which is an IOH(Input/Output Hub) for x86 embedded
@@ -528,6 +516,15 @@ config SRAM
 	  the genalloc API. It is supposed to be used for small on-chip SRAM
 	  areas found on many SoCs.
 
+config VEXPRESS_SYSCFG
+	bool "Versatile Express System Configuration driver"
+	depends on VEXPRESS_CONFIG
+	default y
+	help
+	  ARM Ltd. Versatile Express uses specialised platform configuration
+	  bus. System Configuration interface is one of the possible means
+	  of generating transactions on this bus.
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
@@ -537,4 +534,7 @@ source "drivers/misc/carma/Kconfig"
 source "drivers/misc/altera-stapl/Kconfig"
 source "drivers/misc/mei/Kconfig"
 source "drivers/misc/vmw_vmci/Kconfig"
+source "drivers/misc/mic/Kconfig"
+source "drivers/misc/genwqe/Kconfig"
+source "drivers/misc/echo/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index c235d5b6831..d59ce1261b3 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -33,7 +33,6 @@ obj-$(CONFIG_APDS9802ALS)	+= apds9802als.o
 obj-$(CONFIG_ISL29003)		+= isl29003.o
 obj-$(CONFIG_ISL29020)		+= isl29020.o
 obj-$(CONFIG_SENSORS_TSL2550)	+= tsl2550.o
-obj-$(CONFIG_EP93XX_PWM)	+= ep93xx_pwm.o
 obj-$(CONFIG_DS1682)		+= ds1682.o
 obj-$(CONFIG_TI_DAC7512)	+= ti_dac7512.o
 obj-$(CONFIG_C2PORT)		+= c2port/
@@ -53,3 +52,7 @@ obj-$(CONFIG_INTEL_MEI)		+= mei/
 obj-$(CONFIG_VMWARE_VMCI)	+= vmw_vmci/
 obj-$(CONFIG_LATTICE_ECP3_CONFIG)	+= lattice-ecp3-config.o
 obj-$(CONFIG_SRAM)		+= sram.o
+obj-y				+= mic/
+obj-$(CONFIG_GENWQE)		+= genwqe/
+obj-$(CONFIG_ECHO)		+= echo/
+obj-$(CONFIG_VEXPRESS_SYSCFG)	+= vexpress-syscfg.o
diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c
index 0daadcf1ed7..a43053daad0 100644
--- a/drivers/misc/ad525x_dpot.c
+++ b/drivers/misc/ad525x_dpot.c
@@ -72,7 +72,6 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 
@@ -641,7 +640,7 @@ static const struct attribute_group ad525x_group_commands = {
 	.attrs = ad525x_attributes_commands,
 };
 
-int ad_dpot_add_files(struct device *dev,
+static int ad_dpot_add_files(struct device *dev,
 		unsigned features, unsigned rdac)
 {
 	int err = sysfs_create_file(&dev->kobj,
@@ -666,7 +665,7 @@ int ad_dpot_add_files(struct device *dev,
 	return err;
 }
 
-inline void ad_dpot_remove_files(struct device *dev,
+static inline void ad_dpot_remove_files(struct device *dev,
 		unsigned features, unsigned rdac)
 {
 	sysfs_remove_file(&dev->kobj,
diff --git a/drivers/misc/apds9802als.c b/drivers/misc/apds9802als.c
index 0c6e037153d..c6cc3dc8ae1 100644
--- a/drivers/misc/apds9802als.c
+++ b/drivers/misc/apds9802als.c
@@ -22,7 +22,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/err.h>
diff --git a/drivers/misc/arm-charlcd.c b/drivers/misc/arm-charlcd.c
index 1256a4bf1c0..c72e96b523e 100644
--- a/drivers/misc/arm-charlcd.c
+++ b/drivers/misc/arm-charlcd.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/io.h>
@@ -297,7 +298,7 @@ static int __init charlcd_probe(struct platform_device *pdev)
 	lcd->irq = platform_get_irq(pdev, 0);
 	/* If no IRQ is supplied, we'll survive without it */
 	if (lcd->irq >= 0) {
-		if (request_irq(lcd->irq, charlcd_interrupt, IRQF_DISABLED,
+		if (request_irq(lcd->irq, charlcd_interrupt, 0,
 				DRIVERNAME, lcd)) {
 			ret = -EIO;
 			goto out_no_irq;
@@ -366,11 +367,17 @@ static const struct dev_pm_ops charlcd_pm_ops = {
 	.resume = charlcd_resume,
 };
 
+static const struct of_device_id charlcd_match[] = {
+	{ .compatible = "arm,versatile-lcd", },
+	{}
+};
+
 static struct platform_driver charlcd_driver = {
 	.driver = {
 		.name = DRIVERNAME,
 		.owner = THIS_MODULE,
 		.pm = &charlcd_pm_ops,
+		.of_match_table = of_match_ptr(charlcd_match),
 	},
 	.remove = __exit_p(charlcd_remove),
 };
diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
index f7b90661e32..22de1372764 100644
--- a/drivers/misc/atmel-ssc.c
+++ b/drivers/misc/atmel-ssc.c
@@ -19,7 +19,6 @@
 #include <linux/module.h>
 
 #include <linux/of.h>
-#include <linux/pinctrl/consumer.h>
 
 /* Serialize access to ssc_list and user count */
 static DEFINE_SPINLOCK(user_lock);
@@ -66,14 +65,19 @@ EXPORT_SYMBOL(ssc_request);
 
 void ssc_free(struct ssc_device *ssc)
 {
+	bool disable_clk = true;
+
 	spin_lock(&user_lock);
-	if (ssc->user) {
+	if (ssc->user)
 		ssc->user--;
-		clk_disable_unprepare(ssc->clk);
-	} else {
+	else {
+		disable_clk = false;
 		dev_dbg(&ssc->pdev->dev, "device already free\n");
 	}
 	spin_unlock(&user_lock);
+
+	if (disable_clk)
+		clk_disable_unprepare(ssc->clk);
 }
 EXPORT_SYMBOL(ssc_free);
 
@@ -132,13 +136,6 @@ static int ssc_probe(struct platform_device *pdev)
 	struct resource *regs;
 	struct ssc_device *ssc;
 	const struct atmel_ssc_platform_data *plat_dat;
-	struct pinctrl *pinctrl;
-
-	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
-	if (IS_ERR(pinctrl)) {
-		dev_err(&pdev->dev, "Failed to request pinctrl\n");
-		return PTR_ERR(pinctrl);
-	}
 
 	ssc = devm_kzalloc(&pdev->dev, sizeof(struct ssc_device), GFP_KERNEL);
 	if (!ssc) {
@@ -153,6 +150,12 @@ static int ssc_probe(struct platform_device *pdev)
 		return -ENODEV;
 	ssc->pdata = (struct atmel_ssc_platform_data *)plat_dat;
 
+	if (pdev->dev.of_node) {
+		struct device_node *np = pdev->dev.of_node;
+		ssc->clk_from_rk_pin =
+			of_property_read_bool(np, "atmel,clk-from-rk-pin");
+	}
+
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ssc->regs = devm_ioremap_resource(&pdev->dev, regs);
 	if (IS_ERR(ssc->regs))
diff --git a/drivers/misc/atmel_pwm.c b/drivers/misc/atmel_pwm.c
index 494d0500bda..a6dc56e1bc5 100644
--- a/drivers/misc/atmel_pwm.c
+++ b/drivers/misc/atmel_pwm.c
@@ -90,8 +90,10 @@ int pwm_channel_alloc(int index, struct pwm_channel *ch)
 	unsigned long	flags;
 	int		status = 0;
 
-	/* insist on PWM init, with this signal pinned out */
-	if (!pwm || !(pwm->mask & 1 << index))
+	if (!pwm)
+		return -EPROBE_DEFER;
+
+	if (!(pwm->mask & 1 << index))
 		return -ENODEV;
 
 	if (index < 0 || index >= PWM_NCHAN || !ch)
diff --git a/drivers/misc/bh1780gli.c b/drivers/misc/bh1780gli.c
index 057580e026c..48ea33d15a7 100644
--- a/drivers/misc/bh1780gli.c
+++ b/drivers/misc/bh1780gli.c
@@ -23,6 +23,7 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/of.h>
 
 #define BH1780_REG_CONTROL	0x80
 #define BH1780_REG_PARTID	0x8A
@@ -244,6 +245,15 @@ static const struct i2c_device_id bh1780_id[] = {
 	{ },
 };
 
+#ifdef CONFIG_OF
+static const struct of_device_id of_bh1780_match[] = {
+	{ .compatible = "rohm,bh1780gli", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, of_bh1780_match);
+#endif
+
 static struct i2c_driver bh1780_driver = {
 	.probe		= bh1780_probe,
 	.remove		= bh1780_remove,
@@ -251,6 +261,7 @@ static struct i2c_driver bh1780_driver = {
 	.driver = {
 		.name = "bh1780",
 		.pm	= &bh1780_pm,
+		.of_match_table = of_match_ptr(of_bh1780_match),
 	},
 };
 
diff --git a/drivers/misc/bmp085-i2c.c b/drivers/misc/bmp085-i2c.c
index 3abfcecf842..a7c16295b81 100644
--- a/drivers/misc/bmp085-i2c.c
+++ b/drivers/misc/bmp085-i2c.c
@@ -49,7 +49,7 @@ static int bmp085_i2c_probe(struct i2c_client *client,
 		return err;
 	}
 
-	return bmp085_probe(&client->dev, regmap);
+	return bmp085_probe(&client->dev, regmap, client->irq);
 }
 
 static int bmp085_i2c_remove(struct i2c_client *client)
diff --git a/drivers/misc/bmp085-spi.c b/drivers/misc/bmp085-spi.c
index d6a52659cf2..864ecac3237 100644
--- a/drivers/misc/bmp085-spi.c
+++ b/drivers/misc/bmp085-spi.c
@@ -41,7 +41,7 @@ static int bmp085_spi_probe(struct spi_device *client)
 		return err;
 	}
 
-	return bmp085_probe(&client->dev, regmap);
+	return bmp085_probe(&client->dev, regmap, client->irq);
 }
 
 static int bmp085_spi_remove(struct spi_device *client)
diff --git a/drivers/misc/bmp085.c b/drivers/misc/bmp085.c
index 849e2fed4da..9b313f7810f 100644
--- a/drivers/misc/bmp085.c
+++ b/drivers/misc/bmp085.c
@@ -47,11 +47,12 @@
 
 #include <linux/module.h>
 #include <linux/device.h>
-#include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/delay.h>
 #include <linux/of.h>
 #include "bmp085.h"
+#include <linux/interrupt.h>
+#include <linux/completion.h>
+#include <linux/gpio.h>
 
 #define BMP085_CHIP_ID			0x55
 #define BMP085_CALIBRATION_DATA_START	0xAA
@@ -84,8 +85,19 @@ struct bmp085_data {
 	unsigned long last_temp_measurement;
 	u8	chip_id;
 	s32	b6; /* calculated temperature correction coefficient */
+	int	irq;
+	struct	completion done;
 };
 
+static irqreturn_t bmp085_eoc_isr(int irq, void *devid)
+{
+	struct bmp085_data *data = devid;
+
+	complete(&data->done);
+
+	return IRQ_HANDLED;
+}
+
 static s32 bmp085_read_calibration_data(struct bmp085_data *data)
 {
 	u16 tmp[BMP085_CALIBRATION_DATA_LENGTH];
@@ -116,6 +128,9 @@ static s32 bmp085_update_raw_temperature(struct bmp085_data *data)
 	s32 status;
 
 	mutex_lock(&data->lock);
+
+	init_completion(&data->done);
+
 	status = regmap_write(data->regmap, BMP085_CTRL_REG,
 			      BMP085_TEMP_MEASUREMENT);
 	if (status < 0) {
@@ -123,7 +138,8 @@ static s32 bmp085_update_raw_temperature(struct bmp085_data *data)
 			"Error while requesting temperature measurement.\n");
 		goto exit;
 	}
-	msleep(BMP085_TEMP_CONVERSION_TIME);
+	wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies(
+					    BMP085_TEMP_CONVERSION_TIME));
 
 	status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB,
 				 &tmp, sizeof(tmp));
@@ -147,6 +163,9 @@ static s32 bmp085_update_raw_pressure(struct bmp085_data *data)
 	s32 status;
 
 	mutex_lock(&data->lock);
+
+	init_completion(&data->done);
+
 	status = regmap_write(data->regmap, BMP085_CTRL_REG,
 			BMP085_PRESSURE_MEASUREMENT +
 			(data->oversampling_setting << 6));
@@ -157,8 +176,8 @@ static s32 bmp085_update_raw_pressure(struct bmp085_data *data)
 	}
 
 	/* wait for the end of conversion */
-	msleep(2+(3 << data->oversampling_setting));
-
+	wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies(
+					2+(3 << data->oversampling_setting)));
 	/* copy data into a u32 (4 bytes), but skip the first byte. */
 	status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB,
 				 ((u8 *)&tmp)+1, 3);
@@ -374,7 +393,7 @@ int bmp085_detect(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(bmp085_detect);
 
-static void __init bmp085_get_of_properties(struct bmp085_data *data)
+static void bmp085_get_of_properties(struct bmp085_data *data)
 {
 #ifdef CONFIG_OF
 	struct device_node *np = data->dev->of_node;
@@ -420,7 +439,7 @@ struct regmap_config bmp085_regmap_config = {
 };
 EXPORT_SYMBOL_GPL(bmp085_regmap_config);
 
-int bmp085_probe(struct device *dev, struct regmap *regmap)
+int bmp085_probe(struct device *dev, struct regmap *regmap, int irq)
 {
 	struct bmp085_data *data;
 	int err = 0;
@@ -434,6 +453,15 @@ int bmp085_probe(struct device *dev, struct regmap *regmap)
 	dev_set_drvdata(dev, data);
 	data->dev = dev;
 	data->regmap = regmap;
+	data->irq = irq;
+
+	if (data->irq > 0) {
+		err = devm_request_irq(dev, data->irq, bmp085_eoc_isr,
+					      IRQF_TRIGGER_RISING, "bmp085",
+					      data);
+		if (err < 0)
+			goto exit_free;
+	}
 
 	/* Initialize the BMP085 chip */
 	err = bmp085_init_client(data);
diff --git a/drivers/misc/bmp085.h b/drivers/misc/bmp085.h
index 2b8f615bca9..8b8e3b1f5ca 100644
--- a/drivers/misc/bmp085.h
+++ b/drivers/misc/bmp085.h
@@ -26,7 +26,7 @@
 
 extern struct regmap_config bmp085_regmap_config;
 
-int bmp085_probe(struct device *dev, struct regmap *regmap);
+int bmp085_probe(struct device *dev, struct regmap *regmap, int irq);
 int bmp085_remove(struct device *dev);
 int bmp085_detect(struct device *dev);
 
diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c
index f32550a74bd..464419b3644 100644
--- a/drivers/misc/c2port/core.c
+++ b/drivers/misc/c2port/core.c
@@ -311,6 +311,7 @@ static ssize_t c2port_show_name(struct device *dev,
 
 	return sprintf(buf, "%s\n", c2dev->name);
 }
+static DEVICE_ATTR(name, 0444, c2port_show_name, NULL);
 
 static ssize_t c2port_show_flash_blocks_num(struct device *dev,
 				struct device_attribute *attr, char *buf)
@@ -320,6 +321,7 @@ static ssize_t c2port_show_flash_blocks_num(struct device *dev,
 
 	return sprintf(buf, "%d\n", ops->blocks_num);
 }
+static DEVICE_ATTR(flash_blocks_num, 0444, c2port_show_flash_blocks_num, NULL);
 
 static ssize_t c2port_show_flash_block_size(struct device *dev,
 				struct device_attribute *attr, char *buf)
@@ -329,6 +331,7 @@ static ssize_t c2port_show_flash_block_size(struct device *dev,
 
 	return sprintf(buf, "%d\n", ops->block_size);
 }
+static DEVICE_ATTR(flash_block_size, 0444, c2port_show_flash_block_size, NULL);
 
 static ssize_t c2port_show_flash_size(struct device *dev,
 				struct device_attribute *attr, char *buf)
@@ -338,18 +341,18 @@ static ssize_t c2port_show_flash_size(struct device *dev,
 
 	return sprintf(buf, "%d\n", ops->blocks_num * ops->block_size);
 }
+static DEVICE_ATTR(flash_size, 0444, c2port_show_flash_size, NULL);
 
-static ssize_t c2port_show_access(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t access_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
 {
 	struct c2port_device *c2dev = dev_get_drvdata(dev);
 
 	return sprintf(buf, "%d\n", c2dev->access);
 }
 
-static ssize_t c2port_store_access(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t count)
+static ssize_t access_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
 {
 	struct c2port_device *c2dev = dev_get_drvdata(dev);
 	struct c2port_ops *ops = c2dev->ops;
@@ -375,6 +378,7 @@ static ssize_t c2port_store_access(struct device *dev,
 
 	return count;
 }
+static DEVICE_ATTR_RW(access);
 
 static ssize_t c2port_store_reset(struct device *dev,
 				struct device_attribute *attr,
@@ -395,6 +399,7 @@ static ssize_t c2port_store_reset(struct device *dev,
 
 	return count;
 }
+static DEVICE_ATTR(reset, 0200, NULL, c2port_store_reset);
 
 static ssize_t __c2port_show_dev_id(struct c2port_device *dev, char *buf)
 {
@@ -431,6 +436,7 @@ static ssize_t c2port_show_dev_id(struct device *dev,
 
 	return ret;
 }
+static DEVICE_ATTR(dev_id, 0444, c2port_show_dev_id, NULL);
 
 static ssize_t __c2port_show_rev_id(struct c2port_device *dev, char *buf)
 {
@@ -467,6 +473,7 @@ static ssize_t c2port_show_rev_id(struct device *dev,
 
 	return ret;
 }
+static DEVICE_ATTR(rev_id, 0444, c2port_show_rev_id, NULL);
 
 static ssize_t c2port_show_flash_access(struct device *dev,
 				struct device_attribute *attr, char *buf)
@@ -536,6 +543,8 @@ static ssize_t c2port_store_flash_access(struct device *dev,
 
 	return count;
 }
+static DEVICE_ATTR(flash_access, 0644, c2port_show_flash_access,
+		   c2port_store_flash_access);
 
 static ssize_t __c2port_write_flash_erase(struct c2port_device *dev)
 {
@@ -616,6 +625,7 @@ static ssize_t c2port_store_flash_erase(struct device *dev,
 
 	return count;
 }
+static DEVICE_ATTR(flash_erase, 0200, NULL, c2port_store_flash_erase);
 
 static ssize_t __c2port_read_flash_data(struct c2port_device *dev,
 				char *buffer, loff_t offset, size_t count)
@@ -846,35 +856,40 @@ static ssize_t c2port_write_flash_data(struct file *filp, struct kobject *kobj,
 
 	return ret;
 }
+/* size is computed at run-time */
+static BIN_ATTR(flash_data, 0644, c2port_read_flash_data,
+		c2port_write_flash_data, 0);
 
 /*
  * Class attributes
  */
+static struct attribute *c2port_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_flash_blocks_num.attr,
+	&dev_attr_flash_block_size.attr,
+	&dev_attr_flash_size.attr,
+	&dev_attr_access.attr,
+	&dev_attr_reset.attr,
+	&dev_attr_dev_id.attr,
+	&dev_attr_rev_id.attr,
+	&dev_attr_flash_access.attr,
+	&dev_attr_flash_erase.attr,
+	NULL,
+};
 
-static struct device_attribute c2port_attrs[] = {
-	__ATTR(name, 0444, c2port_show_name, NULL),
-	__ATTR(flash_blocks_num, 0444, c2port_show_flash_blocks_num, NULL),
-	__ATTR(flash_block_size, 0444, c2port_show_flash_block_size, NULL),
-	__ATTR(flash_size, 0444, c2port_show_flash_size, NULL),
-	__ATTR(access, 0644, c2port_show_access, c2port_store_access),
-	__ATTR(reset, 0200, NULL, c2port_store_reset),
-	__ATTR(dev_id, 0444, c2port_show_dev_id, NULL),
-	__ATTR(rev_id, 0444, c2port_show_rev_id, NULL),
-
-	__ATTR(flash_access, 0644, c2port_show_flash_access,
-					c2port_store_flash_access),
-	__ATTR(flash_erase, 0200, NULL, c2port_store_flash_erase),
-	__ATTR_NULL,
+static struct bin_attribute *c2port_bin_attrs[] = {
+	&bin_attr_flash_data,
+	NULL,
 };
 
-static struct bin_attribute c2port_bin_attrs = {
-	.attr	= {
-		.name	= "flash_data",
-		.mode	= 0644
-	},
-	.read	= c2port_read_flash_data,
-	.write	= c2port_write_flash_data,
-	/* .size is computed at run-time */
+static const struct attribute_group c2port_group = {
+	.attrs = c2port_attrs,
+	.bin_attrs = c2port_bin_attrs,
+};
+
+static const struct attribute_group *c2port_groups[] = {
+	&c2port_group,
+	NULL,
 };
 
 /*
@@ -907,6 +922,8 @@ struct c2port_device *c2port_device_register(char *name,
 		goto error_idr_alloc;
 	c2dev->id = ret;
 
+	bin_attr_flash_data.size = ops->blocks_num * ops->block_size;
+
 	c2dev->dev = device_create(c2port_class, NULL, 0, c2dev,
 				   "c2port%d", c2dev->id);
 	if (unlikely(IS_ERR(c2dev->dev))) {
@@ -919,12 +936,6 @@ struct c2port_device *c2port_device_register(char *name,
 	c2dev->ops = ops;
 	mutex_init(&c2dev->mutex);
 
-	/* Create binary file */
-	c2port_bin_attrs.size = ops->blocks_num * ops->block_size;
-	ret = device_create_bin_file(c2dev->dev, &c2port_bin_attrs);
-	if (unlikely(ret))
-		goto error_device_create_bin_file;
-
 	/* By default C2 port access is off */
 	c2dev->access = c2dev->flash_access = 0;
 	ops->access(c2dev, 0);
@@ -937,9 +948,6 @@ struct c2port_device *c2port_device_register(char *name,
 
 	return c2dev;
 
-error_device_create_bin_file:
-	device_destroy(c2port_class, 0);
-
 error_device_create:
 	spin_lock_irq(&c2port_idr_lock);
 	idr_remove(&c2port_idr, c2dev->id);
@@ -959,7 +967,6 @@ void c2port_device_unregister(struct c2port_device *c2dev)
 
 	dev_info(c2dev->dev, "C2 port %s removed\n", c2dev->name);
 
-	device_remove_bin_file(c2dev->dev, &c2port_bin_attrs);
 	spin_lock_irq(&c2port_idr_lock);
 	idr_remove(&c2port_idr, c2dev->id);
 	spin_unlock_irq(&c2port_idr_lock);
@@ -984,7 +991,7 @@ static int __init c2port_init(void)
 		printk(KERN_ERR "c2port: failed to allocate class\n");
 		return PTR_ERR(c2port_class);
 	}
-	c2port_class->dev_attrs = c2port_attrs;
+	c2port_class->dev_groups = c2port_groups;
 
 	return 0;
 }
diff --git a/drivers/misc/carma/carma-fpga-program.c b/drivers/misc/carma/carma-fpga-program.c
index c6bd7e84de2..7be89832db1 100644
--- a/drivers/misc/carma/carma-fpga-program.c
+++ b/drivers/misc/carma/carma-fpga-program.c
@@ -10,6 +10,8 @@
  */
 
 #include <linux/dma-mapping.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/completion.h>
 #include <linux/miscdevice.h>
diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c
index 7b56563f8b7..14d90eae605 100644
--- a/drivers/misc/carma/carma-fpga.c
+++ b/drivers/misc/carma/carma-fpga.c
@@ -88,6 +88,8 @@
  * interrupt source to the GPIO pin. Tada, we hid the interrupt. :)
  */
 
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/dma-mapping.h>
 #include <linux/miscdevice.h>
@@ -99,7 +101,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/poll.h>
-#include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/kref.h>
 #include <linux/io.h>
@@ -631,8 +632,7 @@ static int data_submit_dma(struct fpga_device *priv, struct data_buf *buf)
 	struct dma_async_tx_descriptor *tx;
 	dma_cookie_t cookie;
 	dma_addr_t dst, src;
-	unsigned long dma_flags = DMA_COMPL_SKIP_DEST_UNMAP |
-				  DMA_COMPL_SKIP_SRC_UNMAP;
+	unsigned long dma_flags = 0;
 
 	dst_sg = buf->vb.sglist;
 	dst_nents = buf->vb.sglen;
diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig
index 5acb9c5b49c..22429b8b106 100644
--- a/drivers/misc/cb710/Kconfig
+++ b/drivers/misc/cb710/Kconfig
@@ -1,6 +1,6 @@
 config CB710_CORE
 	tristate "ENE CB710/720 Flash memory card reader support"
-	depends on PCI && GENERIC_HARDIRQS
+	depends on PCI
 	help
 	  This option enables support for PCI ENE CB710/720 Flash memory card
 	  reader found in some laptops (ie. some versions of HP Compaq nx9500).
diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c
index 2e50f811ff5..fb397e7d1cc 100644
--- a/drivers/misc/cb710/core.c
+++ b/drivers/misc/cb710/core.c
@@ -176,7 +176,7 @@ static int cb710_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct cb710_chip *chip = pci_get_drvdata(pdev);
 
-	free_irq(pdev->irq, chip);
+	devm_free_irq(&pdev->dev, pdev->irq, chip);
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 	if (state.event & PM_EVENT_SLEEP)
diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c
index 154b02e5094..b909fb30232 100644
--- a/drivers/misc/ds1682.c
+++ b/drivers/misc/ds1682.c
@@ -32,7 +32,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/i2c.h>
 #include <linux/string.h>
 #include <linux/list.h>
@@ -86,7 +85,6 @@ static ssize_t ds1682_store(struct device *dev, struct device_attribute *attr,
 {
 	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
 	struct i2c_client *client = to_i2c_client(dev);
-	char *endp;
 	u64 val;
 	__le32 val_le;
 	int rc;
@@ -94,8 +92,8 @@ static ssize_t ds1682_store(struct device *dev, struct device_attribute *attr,
 	dev_dbg(dev, "ds1682_store() called on %s\n", attr->attr.name);
 
 	/* Decode input */
-	val = simple_strtoull(buf, &endp, 0);
-	if (buf == endp) {
+	rc = kstrtoull(buf, 0, &val);
+	if (rc < 0) {
 		dev_dbg(dev, "input string not a number\n");
 		return -EINVAL;
 	}
diff --git a/drivers/misc/echo/Kconfig b/drivers/misc/echo/Kconfig
new file mode 100644
index 00000000000..f1d41ea9cd4
--- /dev/null
+++ b/drivers/misc/echo/Kconfig
@@ -0,0 +1,9 @@
+config ECHO
+	tristate "Line Echo Canceller support"
+	default n
+	---help---
+	  This driver provides line echo cancelling support for mISDN and
+	  Zaptel drivers.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called echo.
diff --git a/drivers/misc/echo/Makefile b/drivers/misc/echo/Makefile
new file mode 100644
index 00000000000..7d4caac12a8
--- /dev/null
+++ b/drivers/misc/echo/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_ECHO) += echo.o
diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c
new file mode 100644
index 00000000000..9597e9523ca
--- /dev/null
+++ b/drivers/misc/echo/echo.c
@@ -0,0 +1,674 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * echo.c - A line echo canceller.  This code is being developed
+ *          against and partially complies with G168.
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *         and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe
+ *
+ * Based on a bit from here, a bit from there, eye of toad, ear of
+ * bat, 15 years of failed attempts by David and a few fried brain
+ * cells.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*! \file */
+
+/* Implementation Notes
+   David Rowe
+   April 2007
+
+   This code started life as Steve's NLMS algorithm with a tap
+   rotation algorithm to handle divergence during double talk.  I
+   added a Geigel Double Talk Detector (DTD) [2] and performed some
+   G168 tests.  However I had trouble meeting the G168 requirements,
+   especially for double talk - there were always cases where my DTD
+   failed, for example where near end speech was under the 6dB
+   threshold required for declaring double talk.
+
+   So I tried a two path algorithm [1], which has so far given better
+   results.  The original tap rotation/Geigel algorithm is available
+   in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit.
+   It's probably possible to make it work if some one wants to put some
+   serious work into it.
+
+   At present no special treatment is provided for tones, which
+   generally cause NLMS algorithms to diverge.  Initial runs of a
+   subset of the G168 tests for tones (e.g ./echo_test 6) show the
+   current algorithm is passing OK, which is kind of surprising.  The
+   full set of tests needs to be performed to confirm this result.
+
+   One other interesting change is that I have managed to get the NLMS
+   code to work with 16 bit coefficients, rather than the original 32
+   bit coefficents.  This reduces the MIPs and storage required.
+   I evaulated the 16 bit port using g168_tests.sh and listening tests
+   on 4 real-world samples.
+
+   I also attempted the implementation of a block based NLMS update
+   [2] but although this passes g168_tests.sh it didn't converge well
+   on the real-world samples.  I have no idea why, perhaps a scaling
+   problem.  The block based code is also available in SVN
+   http://svn.rowetel.com/software/oslec/tags/before_16bit.  If this
+   code can be debugged, it will lead to further reduction in MIPS, as
+   the block update code maps nicely onto DSP instruction sets (it's a
+   dot product) compared to the current sample-by-sample update.
+
+   Steve also has some nice notes on echo cancellers in echo.h
+
+   References:
+
+   [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
+       Path Models", IEEE Transactions on communications, COM-25,
+       No. 6, June
+       1977.
+       http://www.rowetel.com/images/echo/dual_path_paper.pdf
+
+   [2] The classic, very useful paper that tells you how to
+       actually build a real world echo canceller:
+	 Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice
+	 Echo Canceller with a TMS320020,
+	 http://www.rowetel.com/images/echo/spra129.pdf
+
+   [3] I have written a series of blog posts on this work, here is
+       Part 1: http://www.rowetel.com/blog/?p=18
+
+   [4] The source code http://svn.rowetel.com/software/oslec/
+
+   [5] A nice reference on LMS filters:
+	 http://en.wikipedia.org/wiki/Least_mean_squares_filter
+
+   Credits:
+
+   Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan
+   Muthukrishnan for their suggestions and email discussions.  Thanks
+   also to those people who collected echo samples for me such as
+   Mark, Pawel, and Pavel.
+*/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "echo.h"
+
+#define MIN_TX_POWER_FOR_ADAPTION	64
+#define MIN_RX_POWER_FOR_ADAPTION	64
+#define DTD_HANGOVER			600	/* 600 samples, or 75ms     */
+#define DC_LOG2BETA			3	/* log2() of DC filter Beta */
+
+/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
+
+#ifdef __bfin__
+static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
+{
+	int i;
+	int offset1;
+	int offset2;
+	int factor;
+	int exp;
+	int16_t *phist;
+	int n;
+
+	if (shift > 0)
+		factor = clean << shift;
+	else
+		factor = clean >> -shift;
+
+	/* Update the FIR taps */
+
+	offset2 = ec->curr_pos;
+	offset1 = ec->taps - offset2;
+	phist = &ec->fir_state_bg.history[offset2];
+
+	/* st: and en: help us locate the assembler in echo.s */
+
+	/* asm("st:"); */
+	n = ec->taps;
+	for (i = 0; i < n; i++) {
+		exp = *phist++ * factor;
+		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+	}
+	/* asm("en:"); */
+
+	/* Note the asm for the inner loop above generated by Blackfin gcc
+	   4.1.1 is pretty good (note even parallel instructions used):
+
+	   R0 = W [P0++] (X);
+	   R0 *= R2;
+	   R0 = R0 + R3 (NS) ||
+	   R1 = W [P1] (X) ||
+	   nop;
+	   R0 >>>= 15;
+	   R0 = R0 + R1;
+	   W [P1++] = R0;
+
+	   A block based update algorithm would be much faster but the
+	   above can't be improved on much.  Every instruction saved in
+	   the loop above is 2 MIPs/ch!  The for loop above is where the
+	   Blackfin spends most of it's time - about 17 MIPs/ch measured
+	   with speedtest.c with 256 taps (32ms).  Write-back and
+	   Write-through cache gave about the same performance.
+	 */
+}
+
+/*
+   IDEAS for further optimisation of lms_adapt_bg():
+
+   1/ The rounding is quite costly.  Could we keep as 32 bit coeffs
+   then make filter pluck the MS 16-bits of the coeffs when filtering?
+   However this would lower potential optimisation of filter, as I
+   think the dual-MAC architecture requires packed 16 bit coeffs.
+
+   2/ Block based update would be more efficient, as per comments above,
+   could use dual MAC architecture.
+
+   3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC
+   packing.
+
+   4/ Execute the whole e/c in a block of say 20ms rather than sample
+   by sample.  Processing a few samples every ms is inefficient.
+*/
+
+#else
+static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
+{
+	int i;
+
+	int offset1;
+	int offset2;
+	int factor;
+	int exp;
+
+	if (shift > 0)
+		factor = clean << shift;
+	else
+		factor = clean >> -shift;
+
+	/* Update the FIR taps */
+
+	offset2 = ec->curr_pos;
+	offset1 = ec->taps - offset2;
+
+	for (i = ec->taps - 1; i >= offset1; i--) {
+		exp = (ec->fir_state_bg.history[i - offset1] * factor);
+		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+	}
+	for (; i >= 0; i--) {
+		exp = (ec->fir_state_bg.history[i + offset2] * factor);
+		ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+	}
+}
+#endif
+
+static inline int top_bit(unsigned int bits)
+{
+	if (bits == 0)
+		return -1;
+	else
+		return (int)fls((int32_t) bits) - 1;
+}
+
+struct oslec_state *oslec_create(int len, int adaption_mode)
+{
+	struct oslec_state *ec;
+	int i;
+	const int16_t *history;
+
+	ec = kzalloc(sizeof(*ec), GFP_KERNEL);
+	if (!ec)
+		return NULL;
+
+	ec->taps = len;
+	ec->log2taps = top_bit(len);
+	ec->curr_pos = ec->taps - 1;
+
+	ec->fir_taps16[0] =
+	    kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+	if (!ec->fir_taps16[0])
+		goto error_oom_0;
+
+	ec->fir_taps16[1] =
+	    kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+	if (!ec->fir_taps16[1])
+		goto error_oom_1;
+
+	history = fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps);
+	if (!history)
+		goto error_state;
+	history = fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps);
+	if (!history)
+		goto error_state_bg;
+
+	for (i = 0; i < 5; i++)
+		ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
+
+	ec->cng_level = 1000;
+	oslec_adaption_mode(ec, adaption_mode);
+
+	ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+	if (!ec->snapshot)
+		goto error_snap;
+
+	ec->cond_met = 0;
+	ec->pstates = 0;
+	ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0;
+	ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0;
+	ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
+	ec->lbgn = ec->lbgn_acc = 0;
+	ec->lbgn_upper = 200;
+	ec->lbgn_upper_acc = ec->lbgn_upper << 13;
+
+	return ec;
+
+error_snap:
+	fir16_free(&ec->fir_state_bg);
+error_state_bg:
+	fir16_free(&ec->fir_state);
+error_state:
+	kfree(ec->fir_taps16[1]);
+error_oom_1:
+	kfree(ec->fir_taps16[0]);
+error_oom_0:
+	kfree(ec);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(oslec_create);
+
+void oslec_free(struct oslec_state *ec)
+{
+	int i;
+
+	fir16_free(&ec->fir_state);
+	fir16_free(&ec->fir_state_bg);
+	for (i = 0; i < 2; i++)
+		kfree(ec->fir_taps16[i]);
+	kfree(ec->snapshot);
+	kfree(ec);
+}
+EXPORT_SYMBOL_GPL(oslec_free);
+
+void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode)
+{
+	ec->adaption_mode = adaption_mode;
+}
+EXPORT_SYMBOL_GPL(oslec_adaption_mode);
+
+void oslec_flush(struct oslec_state *ec)
+{
+	int i;
+
+	ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0;
+	ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0;
+	ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
+
+	ec->lbgn = ec->lbgn_acc = 0;
+	ec->lbgn_upper = 200;
+	ec->lbgn_upper_acc = ec->lbgn_upper << 13;
+
+	ec->nonupdate_dwell = 0;
+
+	fir16_flush(&ec->fir_state);
+	fir16_flush(&ec->fir_state_bg);
+	ec->fir_state.curr_pos = ec->taps - 1;
+	ec->fir_state_bg.curr_pos = ec->taps - 1;
+	for (i = 0; i < 2; i++)
+		memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
+
+	ec->curr_pos = ec->taps - 1;
+	ec->pstates = 0;
+}
+EXPORT_SYMBOL_GPL(oslec_flush);
+
+void oslec_snapshot(struct oslec_state *ec)
+{
+	memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t));
+}
+EXPORT_SYMBOL_GPL(oslec_snapshot);
+
+/* Dual Path Echo Canceller */
+
+int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
+{
+	int32_t echo_value;
+	int clean_bg;
+	int tmp;
+	int tmp1;
+
+	/*
+	 * Input scaling was found be required to prevent problems when tx
+	 * starts clipping.  Another possible way to handle this would be the
+	 * filter coefficent scaling.
+	 */
+
+	ec->tx = tx;
+	ec->rx = rx;
+	tx >>= 1;
+	rx >>= 1;
+
+	/*
+	 * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision
+	 * required otherwise values do not track down to 0. Zero at DC, Pole
+	 * at (1-Beta) on real axis.  Some chip sets (like Si labs) don't
+	 * need this, but something like a $10 X100P card does.  Any DC really
+	 * slows down convergence.
+	 *
+	 * Note: removes some low frequency from the signal, this reduces the
+	 * speech quality when listening to samples through headphones but may
+	 * not be obvious through a telephone handset.
+	 *
+	 * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta
+	 * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
+	 */
+
+	if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
+		tmp = rx << 15;
+
+		/*
+		 * Make sure the gain of the HPF is 1.0. This can still
+		 * saturate a little under impulse conditions, and it might
+		 * roll to 32768 and need clipping on sustained peak level
+		 * signals. However, the scale of such clipping is small, and
+		 * the error due to any saturation should not markedly affect
+		 * the downstream processing.
+		 */
+		tmp -= (tmp >> 4);
+
+		ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2;
+
+		/*
+		 * hard limit filter to prevent clipping.  Note that at this
+		 * stage rx should be limited to +/- 16383 due to right shift
+		 * above
+		 */
+		tmp1 = ec->rx_1 >> 15;
+		if (tmp1 > 16383)
+			tmp1 = 16383;
+		if (tmp1 < -16383)
+			tmp1 = -16383;
+		rx = tmp1;
+		ec->rx_2 = tmp;
+	}
+
+	/* Block average of power in the filter states.  Used for
+	   adaption power calculation. */
+
+	{
+		int new, old;
+
+		/* efficient "out with the old and in with the new" algorithm so
+		   we don't have to recalculate over the whole block of
+		   samples. */
+		new = (int)tx * (int)tx;
+		old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
+		    (int)ec->fir_state.history[ec->fir_state.curr_pos];
+		ec->pstates +=
+		    ((new - old) + (1 << (ec->log2taps - 1))) >> ec->log2taps;
+		if (ec->pstates < 0)
+			ec->pstates = 0;
+	}
+
+	/* Calculate short term average levels using simple single pole IIRs */
+
+	ec->ltxacc += abs(tx) - ec->ltx;
+	ec->ltx = (ec->ltxacc + (1 << 4)) >> 5;
+	ec->lrxacc += abs(rx) - ec->lrx;
+	ec->lrx = (ec->lrxacc + (1 << 4)) >> 5;
+
+	/* Foreground filter */
+
+	ec->fir_state.coeffs = ec->fir_taps16[0];
+	echo_value = fir16(&ec->fir_state, tx);
+	ec->clean = rx - echo_value;
+	ec->lcleanacc += abs(ec->clean) - ec->lclean;
+	ec->lclean = (ec->lcleanacc + (1 << 4)) >> 5;
+
+	/* Background filter */
+
+	echo_value = fir16(&ec->fir_state_bg, tx);
+	clean_bg = rx - echo_value;
+	ec->lclean_bgacc += abs(clean_bg) - ec->lclean_bg;
+	ec->lclean_bg = (ec->lclean_bgacc + (1 << 4)) >> 5;
+
+	/* Background Filter adaption */
+
+	/* Almost always adap bg filter, just simple DT and energy
+	   detection to minimise adaption in cases of strong double talk.
+	   However this is not critical for the dual path algorithm.
+	 */
+	ec->factor = 0;
+	ec->shift = 0;
+	if ((ec->nonupdate_dwell == 0)) {
+		int p, logp, shift;
+
+		/* Determine:
+
+		   f = Beta * clean_bg_rx/P ------ (1)
+
+		   where P is the total power in the filter states.
+
+		   The Boffins have shown that if we obey (1) we converge
+		   quickly and avoid instability.
+
+		   The correct factor f must be in Q30, as this is the fixed
+		   point format required by the lms_adapt_bg() function,
+		   therefore the scaled version of (1) is:
+
+		   (2^30) * f  = (2^30) * Beta * clean_bg_rx/P
+		   factor      = (2^30) * Beta * clean_bg_rx/P     ----- (2)
+
+		   We have chosen Beta = 0.25 by experiment, so:
+
+		   factor      = (2^30) * (2^-2) * clean_bg_rx/P
+
+		   (30 - 2 - log2(P))
+		   factor      = clean_bg_rx 2                     ----- (3)
+
+		   To avoid a divide we approximate log2(P) as top_bit(P),
+		   which returns the position of the highest non-zero bit in
+		   P.  This approximation introduces an error as large as a
+		   factor of 2, but the algorithm seems to handle it OK.
+
+		   Come to think of it a divide may not be a big deal on a
+		   modern DSP, so its probably worth checking out the cycles
+		   for a divide versus a top_bit() implementation.
+		 */
+
+		p = MIN_TX_POWER_FOR_ADAPTION + ec->pstates;
+		logp = top_bit(p) + ec->log2taps;
+		shift = 30 - 2 - logp;
+		ec->shift = shift;
+
+		lms_adapt_bg(ec, clean_bg, shift);
+	}
+
+	/* very simple DTD to make sure we dont try and adapt with strong
+	   near end speech */
+
+	ec->adapt = 0;
+	if ((ec->lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->lrx > ec->ltx))
+		ec->nonupdate_dwell = DTD_HANGOVER;
+	if (ec->nonupdate_dwell)
+		ec->nonupdate_dwell--;
+
+	/* Transfer logic */
+
+	/* These conditions are from the dual path paper [1], I messed with
+	   them a bit to improve performance. */
+
+	if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
+	    (ec->nonupdate_dwell == 0) &&
+	    /* (ec->Lclean_bg < 0.875*ec->Lclean) */
+	    (8 * ec->lclean_bg < 7 * ec->lclean) &&
+	    /* (ec->Lclean_bg < 0.125*ec->Ltx) */
+	    (8 * ec->lclean_bg < ec->ltx)) {
+		if (ec->cond_met == 6) {
+			/*
+			 * BG filter has had better results for 6 consecutive
+			 * samples
+			 */
+			ec->adapt = 1;
+			memcpy(ec->fir_taps16[0], ec->fir_taps16[1],
+			       ec->taps * sizeof(int16_t));
+		} else
+			ec->cond_met++;
+	} else
+		ec->cond_met = 0;
+
+	/* Non-Linear Processing */
+
+	ec->clean_nlp = ec->clean;
+	if (ec->adaption_mode & ECHO_CAN_USE_NLP) {
+		/*
+		 * Non-linear processor - a fancy way to say "zap small
+		 * signals, to avoid residual echo due to (uLaw/ALaw)
+		 * non-linearity in the channel.".
+		 */
+
+		if ((16 * ec->lclean < ec->ltx)) {
+			/*
+			 * Our e/c has improved echo by at least 24 dB (each
+			 * factor of 2 is 6dB, so 2*2*2*2=16 is the same as
+			 * 6+6+6+6=24dB)
+			 */
+			if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
+				ec->cng_level = ec->lbgn;
+
+				/*
+				 * Very elementary comfort noise generation.
+				 * Just random numbers rolled off very vaguely
+				 * Hoth-like.  DR: This noise doesn't sound
+				 * quite right to me - I suspect there are some
+				 * overflow issues in the filtering as it's too
+				 * "crackly".
+				 * TODO: debug this, maybe just play noise at
+				 * high level or look at spectrum.
+				 */
+
+				ec->cng_rndnum =
+				    1664525U * ec->cng_rndnum + 1013904223U;
+				ec->cng_filter =
+				    ((ec->cng_rndnum & 0xFFFF) - 32768 +
+				     5 * ec->cng_filter) >> 3;
+				ec->clean_nlp =
+				    (ec->cng_filter * ec->cng_level * 8) >> 14;
+
+			} else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) {
+				/* This sounds much better than CNG */
+				if (ec->clean_nlp > ec->lbgn)
+					ec->clean_nlp = ec->lbgn;
+				if (ec->clean_nlp < -ec->lbgn)
+					ec->clean_nlp = -ec->lbgn;
+			} else {
+				/*
+				 * just mute the residual, doesn't sound very
+				 * good, used mainly in G168 tests
+				 */
+				ec->clean_nlp = 0;
+			}
+		} else {
+			/*
+			 * Background noise estimator.  I tried a few
+			 * algorithms here without much luck.  This very simple
+			 * one seems to work best, we just average the level
+			 * using a slow (1 sec time const) filter if the
+			 * current level is less than a (experimentally
+			 * derived) constant.  This means we dont include high
+			 * level signals like near end speech.  When combined
+			 * with CNG or especially CLIP seems to work OK.
+			 */
+			if (ec->lclean < 40) {
+				ec->lbgn_acc += abs(ec->clean) - ec->lbgn;
+				ec->lbgn = (ec->lbgn_acc + (1 << 11)) >> 12;
+			}
+		}
+	}
+
+	/* Roll around the taps buffer */
+	if (ec->curr_pos <= 0)
+		ec->curr_pos = ec->taps;
+	ec->curr_pos--;
+
+	if (ec->adaption_mode & ECHO_CAN_DISABLE)
+		ec->clean_nlp = rx;
+
+	/* Output scaled back up again to match input scaling */
+
+	return (int16_t) ec->clean_nlp << 1;
+}
+EXPORT_SYMBOL_GPL(oslec_update);
+
+/* This function is separated from the echo canceller is it is usually called
+   as part of the tx process.  See rx HP (DC blocking) filter above, it's
+   the same design.
+
+   Some soft phones send speech signals with a lot of low frequency
+   energy, e.g. down to 20Hz.  This can make the hybrid non-linear
+   which causes the echo canceller to fall over.  This filter can help
+   by removing any low frequency before it gets to the tx port of the
+   hybrid.
+
+   It can also help by removing and DC in the tx signal.  DC is bad
+   for LMS algorithms.
+
+   This is one of the classic DC removal filters, adjusted to provide
+   sufficient bass rolloff to meet the above requirement to protect hybrids
+   from things that upset them. The difference between successive samples
+   produces a lousy HPF, and then a suitably placed pole flattens things out.
+   The final result is a nicely rolled off bass end. The filtering is
+   implemented with extended fractional precision, which noise shapes things,
+   giving very clean DC removal.
+*/
+
+int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx)
+{
+	int tmp;
+	int tmp1;
+
+	if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
+		tmp = tx << 15;
+
+		/*
+		 * Make sure the gain of the HPF is 1.0. The first can still
+		 * saturate a little under impulse conditions, and it might
+		 * roll to 32768 and need clipping on sustained peak level
+		 * signals. However, the scale of such clipping is small, and
+		 * the error due to any saturation should not markedly affect
+		 * the downstream processing.
+		 */
+		tmp -= (tmp >> 4);
+
+		ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2;
+		tmp1 = ec->tx_1 >> 15;
+		if (tmp1 > 32767)
+			tmp1 = 32767;
+		if (tmp1 < -32767)
+			tmp1 = -32767;
+		tx = tmp1;
+		ec->tx_2 = tmp;
+	}
+
+	return tx;
+}
+EXPORT_SYMBOL_GPL(oslec_hpf_tx);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David Rowe");
+MODULE_DESCRIPTION("Open Source Line Echo Canceller");
+MODULE_VERSION("0.3.0");
diff --git a/drivers/misc/echo/echo.h b/drivers/misc/echo/echo.h
new file mode 100644
index 00000000000..9b08c63e636
--- /dev/null
+++ b/drivers/misc/echo/echo.h
@@ -0,0 +1,187 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * echo.c - A line echo canceller.  This code is being developed
+ *          against and partially complies with G168.
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *         and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001 Steve Underwood and 2007 David Rowe
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __ECHO_H
+#define __ECHO_H
+
+/*
+Line echo cancellation for voice
+
+What does it do?
+
+This module aims to provide G.168-2002 compliant echo cancellation, to remove
+electrical echoes (e.g. from 2-4 wire hybrids) from voice calls.
+
+How does it work?
+
+The heart of the echo cancellor is FIR filter. This is adapted to match the
+echo impulse response of the telephone line. It must be long enough to
+adequately cover the duration of that impulse response. The signal transmitted
+to the telephone line is passed through the FIR filter. Once the FIR is
+properly adapted, the resulting output is an estimate of the echo signal
+received from the line. This is subtracted from the received signal. The result
+is an estimate of the signal which originated at the far end of the line, free
+from echos of our own transmitted signal.
+
+The least mean squares (LMS) algorithm is attributed to Widrow and Hoff, and
+was introduced in 1960. It is the commonest form of filter adaption used in
+things like modem line equalisers and line echo cancellers. There it works very
+well.  However, it only works well for signals of constant amplitude. It works
+very poorly for things like speech echo cancellation, where the signal level
+varies widely.  This is quite easy to fix. If the signal level is normalised -
+similar to applying AGC - LMS can work as well for a signal of varying
+amplitude as it does for a modem signal. This normalised least mean squares
+(NLMS) algorithm is the commonest one used for speech echo cancellation. Many
+other algorithms exist - e.g. RLS (essentially the same as Kalman filtering),
+FAP, etc. Some perform significantly better than NLMS.  However, factors such
+as computational complexity and patents favour the use of NLMS.
+
+A simple refinement to NLMS can improve its performance with speech. NLMS tends
+to adapt best to the strongest parts of a signal. If the signal is white noise,
+the NLMS algorithm works very well. However, speech has more low frequency than
+high frequency content. Pre-whitening (i.e. filtering the signal to flatten its
+spectrum) the echo signal improves the adapt rate for speech, and ensures the
+final residual signal is not heavily biased towards high frequencies. A very
+low complexity filter is adequate for this, so pre-whitening adds little to the
+compute requirements of the echo canceller.
+
+An FIR filter adapted using pre-whitened NLMS performs well, provided certain
+conditions are met:
+
+    - The transmitted signal has poor self-correlation.
+    - There is no signal being generated within the environment being
+      cancelled.
+
+The difficulty is that neither of these can be guaranteed.
+
+If the adaption is performed while transmitting noise (or something fairly
+noise like, such as voice) the adaption works very well. If the adaption is
+performed while transmitting something highly correlative (typically narrow
+band energy such as signalling tones or DTMF), the adaption can go seriously
+wrong. The reason is there is only one solution for the adaption on a near
+random signal - the impulse response of the line. For a repetitive signal,
+there are any number of solutions which converge the adaption, and nothing
+guides the adaption to choose the generalised one. Allowing an untrained
+canceller to converge on this kind of narrowband energy probably a good thing,
+since at least it cancels the tones. Allowing a well converged canceller to
+continue converging on such energy is just a way to ruin its generalised
+adaption. A narrowband detector is needed, so adapation can be suspended at
+appropriate times.
+
+The adaption process is based on trying to eliminate the received signal. When
+there is any signal from within the environment being cancelled it may upset
+the adaption process. Similarly, if the signal we are transmitting is small,
+noise may dominate and disturb the adaption process. If we can ensure that the
+adaption is only performed when we are transmitting a significant signal level,
+and the environment is not, things will be OK. Clearly, it is easy to tell when
+we are sending a significant signal. Telling, if the environment is generating
+a significant signal, and doing it with sufficient speed that the adaption will
+not have diverged too much more we stop it, is a little harder.
+
+The key problem in detecting when the environment is sourcing significant
+energy is that we must do this very quickly. Given a reasonably long sample of
+the received signal, there are a number of strategies which may be used to
+assess whether that signal contains a strong far end component. However, by the
+time that assessment is complete the far end signal will have already caused
+major mis-convergence in the adaption process. An assessment algorithm is
+needed which produces a fairly accurate result from a very short burst of far
+end energy.
+
+How do I use it?
+
+The echo cancellor processes both the transmit and receive streams sample by
+sample. The processing function is not declared inline. Unfortunately,
+cancellation requires many operations per sample, so the call overhead is only
+a minor burden.
+*/
+
+#include "fir.h"
+#include "oslec.h"
+
+/*
+    G.168 echo canceller descriptor. This defines the working state for a line
+    echo canceller.
+*/
+struct oslec_state {
+	int16_t tx;
+	int16_t rx;
+	int16_t clean;
+	int16_t clean_nlp;
+
+	int nonupdate_dwell;
+	int curr_pos;
+	int taps;
+	int log2taps;
+	int adaption_mode;
+
+	int cond_met;
+	int32_t pstates;
+	int16_t adapt;
+	int32_t factor;
+	int16_t shift;
+
+	/* Average levels and averaging filter states */
+	int ltxacc;
+	int lrxacc;
+	int lcleanacc;
+	int lclean_bgacc;
+	int ltx;
+	int lrx;
+	int lclean;
+	int lclean_bg;
+	int lbgn;
+	int lbgn_acc;
+	int lbgn_upper;
+	int lbgn_upper_acc;
+
+	/* foreground and background filter states */
+	struct fir16_state_t fir_state;
+	struct fir16_state_t fir_state_bg;
+	int16_t *fir_taps16[2];
+
+	/* DC blocking filter states */
+	int tx_1;
+	int tx_2;
+	int rx_1;
+	int rx_2;
+
+	/* optional High Pass Filter states */
+	int32_t xvtx[5];
+	int32_t yvtx[5];
+	int32_t xvrx[5];
+	int32_t yvrx[5];
+
+	/* Parameters for the optional Hoth noise generator */
+	int cng_level;
+	int cng_rndnum;
+	int cng_filter;
+
+	/* snapshot sample of coeffs used for development */
+	int16_t *snapshot;
+};
+
+#endif /* __ECHO_H */
diff --git a/drivers/misc/echo/fir.h b/drivers/misc/echo/fir.h
new file mode 100644
index 00000000000..7b9fabf1fea
--- /dev/null
+++ b/drivers/misc/echo/fir.h
@@ -0,0 +1,216 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * fir.h - General telephony FIR routines
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *
+ * Copyright (C) 2002 Steve Underwood
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#if !defined(_FIR_H_)
+#define _FIR_H_
+
+/*
+   Blackfin NOTES & IDEAS:
+
+   A simple dot product function is used to implement the filter.  This performs
+   just one MAC/cycle which is inefficient but was easy to implement as a first
+   pass.  The current Blackfin code also uses an unrolled form of the filter
+   history to avoid 0 length hardware loop issues.  This is wasteful of
+   memory.
+
+   Ideas for improvement:
+
+   1/ Rewrite filter for dual MAC inner loop.  The issue here is handling
+   history sample offsets that are 16 bit aligned - the dual MAC needs
+   32 bit aligmnent.  There are some good examples in libbfdsp.
+
+   2/ Use the hardware circular buffer facility tohalve memory usage.
+
+   3/ Consider using internal memory.
+
+   Using less memory might also improve speed as cache misses will be
+   reduced. A drop in MIPs and memory approaching 50% should be
+   possible.
+
+   The foreground and background filters currenlty use a total of
+   about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo
+   can.
+*/
+
+/*
+ * 16 bit integer FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using 16 bit integer coefficients.
+ */
+struct fir16_state_t {
+	int taps;
+	int curr_pos;
+	const int16_t *coeffs;
+	int16_t *history;
+};
+
+/*
+ * 32 bit integer FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using 32 bit integer coefficients, and filtering
+ * 16 bit integer data.
+ */
+struct fir32_state_t {
+	int taps;
+	int curr_pos;
+	const int32_t *coeffs;
+	int16_t *history;
+};
+
+/*
+ * Floating point FIR descriptor. This defines the working state for a single
+ * instance of an FIR filter using floating point coefficients and data.
+ */
+struct fir_float_state_t {
+	int taps;
+	int curr_pos;
+	const float *coeffs;
+	float *history;
+};
+
+static inline const int16_t *fir16_create(struct fir16_state_t *fir,
+					      const int16_t *coeffs, int taps)
+{
+	fir->taps = taps;
+	fir->curr_pos = taps - 1;
+	fir->coeffs = coeffs;
+#if defined(__bfin__)
+	fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL);
+#else
+	fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
+#endif
+	return fir->history;
+}
+
+static inline void fir16_flush(struct fir16_state_t *fir)
+{
+#if defined(__bfin__)
+	memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t));
+#else
+	memset(fir->history, 0, fir->taps * sizeof(int16_t));
+#endif
+}
+
+static inline void fir16_free(struct fir16_state_t *fir)
+{
+	kfree(fir->history);
+}
+
+#ifdef __bfin__
+static inline int32_t dot_asm(short *x, short *y, int len)
+{
+	int dot;
+
+	len--;
+
+	__asm__("I0 = %1;\n\t"
+		"I1 = %2;\n\t"
+		"A0 = 0;\n\t"
+		"R0.L = W[I0++] || R1.L = W[I1++];\n\t"
+		"LOOP dot%= LC0 = %3;\n\t"
+		"LOOP_BEGIN dot%=;\n\t"
+		"A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t"
+		"LOOP_END dot%=;\n\t"
+		"A0 += R0.L*R1.L (IS);\n\t"
+		"R0 = A0;\n\t"
+		"%0 = R0;\n\t"
+		: "=&d"(dot)
+		: "a"(x), "a"(y), "a"(len)
+		: "I0", "I1", "A1", "A0", "R0", "R1"
+	);
+
+	return dot;
+}
+#endif
+
+static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample)
+{
+	int32_t y;
+#if defined(__bfin__)
+	fir->history[fir->curr_pos] = sample;
+	fir->history[fir->curr_pos + fir->taps] = sample;
+	y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos],
+		    fir->taps);
+#else
+	int i;
+	int offset1;
+	int offset2;
+
+	fir->history[fir->curr_pos] = sample;
+
+	offset2 = fir->curr_pos;
+	offset1 = fir->taps - offset2;
+	y = 0;
+	for (i = fir->taps - 1; i >= offset1; i--)
+		y += fir->coeffs[i] * fir->history[i - offset1];
+	for (; i >= 0; i--)
+		y += fir->coeffs[i] * fir->history[i + offset2];
+#endif
+	if (fir->curr_pos <= 0)
+		fir->curr_pos = fir->taps;
+	fir->curr_pos--;
+	return (int16_t) (y >> 15);
+}
+
+static inline const int16_t *fir32_create(struct fir32_state_t *fir,
+					      const int32_t *coeffs, int taps)
+{
+	fir->taps = taps;
+	fir->curr_pos = taps - 1;
+	fir->coeffs = coeffs;
+	fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
+	return fir->history;
+}
+
+static inline void fir32_flush(struct fir32_state_t *fir)
+{
+	memset(fir->history, 0, fir->taps * sizeof(int16_t));
+}
+
+static inline void fir32_free(struct fir32_state_t *fir)
+{
+	kfree(fir->history);
+}
+
+static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample)
+{
+	int i;
+	int32_t y;
+	int offset1;
+	int offset2;
+
+	fir->history[fir->curr_pos] = sample;
+	offset2 = fir->curr_pos;
+	offset1 = fir->taps - offset2;
+	y = 0;
+	for (i = fir->taps - 1; i >= offset1; i--)
+		y += fir->coeffs[i] * fir->history[i - offset1];
+	for (; i >= 0; i--)
+		y += fir->coeffs[i] * fir->history[i + offset2];
+	if (fir->curr_pos <= 0)
+		fir->curr_pos = fir->taps;
+	fir->curr_pos--;
+	return (int16_t) (y >> 15);
+}
+
+#endif
diff --git a/drivers/misc/echo/oslec.h b/drivers/misc/echo/oslec.h
new file mode 100644
index 00000000000..f4175360ce2
--- /dev/null
+++ b/drivers/misc/echo/oslec.h
@@ -0,0 +1,94 @@
+/*
+ *  OSLEC - A line echo canceller.  This code is being developed
+ *          against and partially complies with G168. Using code from SpanDSP
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *         and David Rowe <david_at_rowetel_dot_com>
+ *
+ * Copyright (C) 2001 Steve Underwood and 2007-2008 David Rowe
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __OSLEC_H
+#define __OSLEC_H
+
+/* Mask bits for the adaption mode */
+#define ECHO_CAN_USE_ADAPTION	0x01
+#define ECHO_CAN_USE_NLP	0x02
+#define ECHO_CAN_USE_CNG	0x04
+#define ECHO_CAN_USE_CLIP	0x08
+#define ECHO_CAN_USE_TX_HPF	0x10
+#define ECHO_CAN_USE_RX_HPF	0x20
+#define ECHO_CAN_DISABLE	0x40
+
+/**
+ * oslec_state: G.168 echo canceller descriptor.
+ *
+ * This defines the working state for a line echo canceller.
+ */
+struct oslec_state;
+
+/**
+ * oslec_create - Create a voice echo canceller context.
+ * @len: The length of the canceller, in samples.
+ * @return: The new canceller context, or NULL if the canceller could not be
+ * created.
+ */
+struct oslec_state *oslec_create(int len, int adaption_mode);
+
+/**
+ * oslec_free - Free a voice echo canceller context.
+ * @ec: The echo canceller context.
+ */
+void oslec_free(struct oslec_state *ec);
+
+/**
+ * oslec_flush - Flush (reinitialise) a voice echo canceller context.
+ * @ec: The echo canceller context.
+ */
+void oslec_flush(struct oslec_state *ec);
+
+/**
+ * oslec_adaption_mode - set the adaption mode of a voice echo canceller context.
+ * @ec The echo canceller context.
+ * @adaption_mode: The mode.
+ */
+void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode);
+
+void oslec_snapshot(struct oslec_state *ec);
+
+/**
+ * oslec_update: Process a sample through a voice echo canceller.
+ * @ec: The echo canceller context.
+ * @tx: The transmitted audio sample.
+ * @rx: The received audio sample.
+ *
+ * The return value is the clean (echo cancelled) received sample.
+ */
+int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx);
+
+/**
+ * oslec_hpf_tx: Process to high pass filter the tx signal.
+ * @ec: The echo canceller context.
+ * @tx: The transmitted auio sample.
+ *
+ * The return value is the HP filtered transmit sample, send this to your D/A.
+ */
+int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx);
+
+#endif /* __OSLEC_H */
diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
index 04f2e1fa9dd..9536852fd4c 100644
--- a/drivers/misc/eeprom/Kconfig
+++ b/drivers/misc/eeprom/Kconfig
@@ -96,4 +96,17 @@ config EEPROM_DIGSY_MTC_CFG
 
 	  If unsure, say N.
 
+config EEPROM_SUNXI_SID
+	tristate "Allwinner sunxi security ID support"
+	depends on ARCH_SUNXI && SYSFS
+	help
+	  This is a driver for the 'security ID' available on various Allwinner
+	  devices.
+
+	  Due to the potential risks involved with changing e-fuses,
+	  this driver is read-only.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called sunxi_sid.
+
 endmenu
diff --git a/drivers/misc/eeprom/Makefile b/drivers/misc/eeprom/Makefile
index fc1e81d2926..9507aec95e9 100644
--- a/drivers/misc/eeprom/Makefile
+++ b/drivers/misc/eeprom/Makefile
@@ -4,4 +4,5 @@ obj-$(CONFIG_EEPROM_LEGACY)	+= eeprom.o
 obj-$(CONFIG_EEPROM_MAX6875)	+= max6875.o
 obj-$(CONFIG_EEPROM_93CX6)	+= eeprom_93cx6.o
 obj-$(CONFIG_EEPROM_93XX46)	+= eeprom_93xx46.o
+obj-$(CONFIG_EEPROM_SUNXI_SID)	+= sunxi_sid.o
 obj-$(CONFIG_EEPROM_DIGSY_MTC_CFG) += digsy_mtc_eeprom.o
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 5d4fd69d04c..d87f77f790d 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -22,7 +22,7 @@
 #include <linux/jiffies.h>
 #include <linux/of.h>
 #include <linux/i2c.h>
-#include <linux/i2c/at24.h>
+#include <linux/platform_data/at24.h>
 
 /*
  * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
@@ -428,6 +428,9 @@ static ssize_t at24_bin_write(struct file *filp, struct kobject *kobj,
 {
 	struct at24_data *at24;
 
+	if (unlikely(off >= attr->size))
+		return -EFBIG;
+
 	at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
 	return at24_write(at24, buf, off, count);
 }
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 840b3594a5a..634f72929e1 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
@@ -462,10 +461,17 @@ static int at25_remove(struct spi_device *spi)
 
 /*-------------------------------------------------------------------------*/
 
+static const struct of_device_id at25_of_match[] = {
+	{ .compatible = "atmel,at25", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, at25_of_match);
+
 static struct spi_driver at25_driver = {
 	.driver = {
 		.name		= "at25",
 		.owner		= THIS_MODULE,
+		.of_match_table = at25_of_match,
 	},
 	.probe		= at25_probe,
 	.remove		= at25_remove,
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index c169e07654c..33f8673d23a 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -3,7 +3,7 @@
  *                           Philip Edelbrock <phil@netroedge.com>
  * Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com>
  * Copyright (C) 2003 IBM Corp.
- * Copyright (C) 2004 Jean Delvare <khali@linux-fr.org>
+ * Copyright (C) 2004 Jean Delvare <jdelvare@suse.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -17,7 +17,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/jiffies.h>
diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c
index 94cfc121257..9ebeacdb8ec 100644
--- a/drivers/misc/eeprom/eeprom_93xx46.c
+++ b/drivers/misc/eeprom/eeprom_93xx46.c
@@ -11,7 +11,6 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -202,7 +201,7 @@ eeprom_93xx46_bin_write(struct file *filp, struct kobject *kobj,
 	edev = dev_get_drvdata(dev);
 
 	if (unlikely(off >= edev->bin.size))
-		return 0;
+		return -EFBIG;
 	if ((off + count) > edev->bin.size)
 		count = edev->bin.size - off;
 	if (unlikely(!count))
@@ -378,7 +377,6 @@ static int eeprom_93xx46_remove(struct spi_device *spi)
 		device_remove_file(&spi->dev, &dev_attr_erase);
 
 	sysfs_remove_bin_file(&spi->dev.kobj, &edev->bin);
-	spi_set_drvdata(spi, NULL);
 	kfree(edev);
 	return 0;
 }
diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c
index e36157d5d3a..580ff9df552 100644
--- a/drivers/misc/eeprom/max6875.c
+++ b/drivers/misc/eeprom/max6875.c
@@ -27,7 +27,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
diff --git a/drivers/misc/eeprom/sunxi_sid.c b/drivers/misc/eeprom/sunxi_sid.c
new file mode 100644
index 00000000000..3f2b625b203
--- /dev/null
+++ b/drivers/misc/eeprom/sunxi_sid.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2013 Oliver Schinagl <oliver@schinagl.nl>
+ * http://www.linux-sunxi.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver exposes the Allwinner security ID, efuses exported in byte-
+ * sized chunks.
+ */
+
+#include <linux/compiler.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#define DRV_NAME "sunxi-sid"
+
+struct sunxi_sid_data {
+	void __iomem *reg_base;
+	unsigned int keysize;
+};
+
+/* We read the entire key, due to a 32 bit read alignment requirement. Since we
+ * want to return the requested byte, this results in somewhat slower code and
+ * uses 4 times more reads as needed but keeps code simpler. Since the SID is
+ * only very rarely probed, this is not really an issue.
+ */
+static u8 sunxi_sid_read_byte(const struct sunxi_sid_data *sid_data,
+			      const unsigned int offset)
+{
+	u32 sid_key;
+
+	if (offset >= sid_data->keysize)
+		return 0;
+
+	sid_key = ioread32be(sid_data->reg_base + round_down(offset, 4));
+	sid_key >>= (offset % 4) * 8;
+
+	return sid_key; /* Only return the last byte */
+}
+
+static ssize_t sid_read(struct file *fd, struct kobject *kobj,
+			struct bin_attribute *attr, char *buf,
+			loff_t pos, size_t size)
+{
+	struct platform_device *pdev;
+	struct sunxi_sid_data *sid_data;
+	int i;
+
+	pdev = to_platform_device(kobj_to_dev(kobj));
+	sid_data = platform_get_drvdata(pdev);
+
+	if (pos < 0 || pos >= sid_data->keysize)
+		return 0;
+	if (size > sid_data->keysize - pos)
+		size = sid_data->keysize - pos;
+
+	for (i = 0; i < size; i++)
+		buf[i] = sunxi_sid_read_byte(sid_data, pos + i);
+
+	return i;
+}
+
+static struct bin_attribute sid_bin_attr = {
+	.attr = { .name = "eeprom", .mode = S_IRUGO, },
+	.read = sid_read,
+};
+
+static int sunxi_sid_remove(struct platform_device *pdev)
+{
+	device_remove_bin_file(&pdev->dev, &sid_bin_attr);
+	dev_dbg(&pdev->dev, "driver unloaded\n");
+
+	return 0;
+}
+
+static const struct of_device_id sunxi_sid_of_match[] = {
+	{ .compatible = "allwinner,sun4i-a10-sid", .data = (void *)16},
+	{ .compatible = "allwinner,sun7i-a20-sid", .data = (void *)512},
+	{/* sentinel */},
+};
+MODULE_DEVICE_TABLE(of, sunxi_sid_of_match);
+
+static int sunxi_sid_probe(struct platform_device *pdev)
+{
+	struct sunxi_sid_data *sid_data;
+	struct resource *res;
+	const struct of_device_id *of_dev_id;
+	u8 *entropy;
+	unsigned int i;
+
+	sid_data = devm_kzalloc(&pdev->dev, sizeof(struct sunxi_sid_data),
+				GFP_KERNEL);
+	if (!sid_data)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sid_data->reg_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(sid_data->reg_base))
+		return PTR_ERR(sid_data->reg_base);
+
+	of_dev_id = of_match_device(sunxi_sid_of_match, &pdev->dev);
+	if (!of_dev_id)
+		return -ENODEV;
+	sid_data->keysize = (int)of_dev_id->data;
+
+	platform_set_drvdata(pdev, sid_data);
+
+	sid_bin_attr.size = sid_data->keysize;
+	if (device_create_bin_file(&pdev->dev, &sid_bin_attr))
+		return -ENODEV;
+
+	entropy = kzalloc(sizeof(u8) * sid_data->keysize, GFP_KERNEL);
+	for (i = 0; i < sid_data->keysize; i++)
+		entropy[i] = sunxi_sid_read_byte(sid_data, i);
+	add_device_randomness(entropy, sid_data->keysize);
+	kfree(entropy);
+
+	dev_dbg(&pdev->dev, "loaded\n");
+
+	return 0;
+}
+
+static struct platform_driver sunxi_sid_driver = {
+	.probe = sunxi_sid_probe,
+	.remove = sunxi_sid_remove,
+	.driver = {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table = sunxi_sid_of_match,
+	},
+};
+module_platform_driver(sunxi_sid_driver);
+
+MODULE_AUTHOR("Oliver Schinagl <oliver@schinagl.nl>");
+MODULE_DESCRIPTION("Allwinner sunxi security id driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index 00e5fcac8fd..2cf2bbc0b92 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -198,6 +198,13 @@ static void enclosure_remove_links(struct enclosure_component *cdev)
 {
 	char name[ENCLOSURE_NAME_SIZE];
 
+	/*
+	 * In odd circumstances, like multipath devices, something else may
+	 * already have removed the links, so check for this condition first.
+	 */
+	if (!cdev->dev->kobj.sd)
+		return;
+
 	enclosure_link_name(cdev, name);
 	sysfs_remove_link(&cdev->dev->kobj, name);
 	sysfs_remove_link(&cdev->cdev.kobj, "device");
@@ -239,7 +246,7 @@ static void enclosure_component_release(struct device *dev)
 	put_device(dev->parent);
 }
 
-static const struct attribute_group *enclosure_groups[];
+static const struct attribute_group *enclosure_component_groups[];
 
 /**
  * enclosure_component_register - add a particular component to an enclosure
@@ -282,7 +289,7 @@ enclosure_component_register(struct enclosure_device *edev,
 		dev_set_name(cdev, "%u", number);
 
 	cdev->release = enclosure_component_release;
-	cdev->groups = enclosure_groups;
+	cdev->groups = enclosure_component_groups;
 
 	err = device_register(cdev);
 	if (err) {
@@ -365,25 +372,26 @@ EXPORT_SYMBOL_GPL(enclosure_remove_device);
  * sysfs pieces below
  */
 
-static ssize_t enclosure_show_components(struct device *cdev,
-					 struct device_attribute *attr,
-					 char *buf)
+static ssize_t components_show(struct device *cdev,
+			       struct device_attribute *attr, char *buf)
 {
 	struct enclosure_device *edev = to_enclosure_device(cdev);
 
 	return snprintf(buf, 40, "%d\n", edev->components);
 }
+static DEVICE_ATTR_RO(components);
 
-static struct device_attribute enclosure_attrs[] = {
-	__ATTR(components, S_IRUGO, enclosure_show_components, NULL),
-	__ATTR_NULL
+static struct attribute *enclosure_class_attrs[] = {
+	&dev_attr_components.attr,
+	NULL,
 };
+ATTRIBUTE_GROUPS(enclosure_class);
 
 static struct class enclosure_class = {
 	.name			= "enclosure",
 	.owner			= THIS_MODULE,
 	.dev_release		= enclosure_release,
-	.dev_attrs		= enclosure_attrs,
+	.dev_groups		= enclosure_class_groups,
 };
 
 static const char *const enclosure_status [] = {
@@ -536,15 +544,7 @@ static struct attribute *enclosure_component_attrs[] = {
 	&dev_attr_type.attr,
 	NULL
 };
-
-static struct attribute_group enclosure_group = {
-	.attrs = enclosure_component_attrs,
-};
-
-static const struct attribute_group *enclosure_groups[] = {
-	&enclosure_group,
-	NULL
-};
+ATTRIBUTE_GROUPS(enclosure_component);
 
 static int __init enclosure_init(void)
 {
diff --git a/drivers/misc/ep93xx_pwm.c b/drivers/misc/ep93xx_pwm.c
deleted file mode 100644
index cdb67a9c195..00000000000
--- a/drivers/misc/ep93xx_pwm.c
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- *  Simple PWM driver for EP93XX
- *
- *	(c) Copyright 2009  Matthieu Crapet <mcrapet@gmail.com>
- *	(c) Copyright 2009  H Hartley Sweeten <hsweeten@visionengravers.com>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- *  EP9307 has only one channel:
- *    - PWMOUT
- *
- *  EP9301/02/12/15 have two channels:
- *    - PWMOUT
- *    - PWMOUT1 (alternate function for EGPIO14)
- */
-
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/io.h>
-
-#include <mach/platform.h>
-
-#define EP93XX_PWMx_TERM_COUNT	0x00
-#define EP93XX_PWMx_DUTY_CYCLE	0x04
-#define EP93XX_PWMx_ENABLE	0x08
-#define EP93XX_PWMx_INVERT	0x0C
-
-#define EP93XX_PWM_MAX_COUNT	0xFFFF
-
-struct ep93xx_pwm {
-	void __iomem	*mmio_base;
-	struct clk	*clk;
-	u32		duty_percent;
-};
-
-/*
- * /sys/devices/platform/ep93xx-pwm.N
- *   /min_freq      read-only   minimum pwm output frequency
- *   /max_req       read-only   maximum pwm output frequency
- *   /freq          read-write  pwm output frequency (0 = disable output)
- *   /duty_percent  read-write  pwm duty cycle percent (1..99)
- *   /invert        read-write  invert pwm output
- */
-
-static ssize_t ep93xx_pwm_get_min_freq(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
-	unsigned long rate = clk_get_rate(pwm->clk);
-
-	return sprintf(buf, "%ld\n", rate / (EP93XX_PWM_MAX_COUNT + 1));
-}
-
-static ssize_t ep93xx_pwm_get_max_freq(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
-	unsigned long rate = clk_get_rate(pwm->clk);
-
-	return sprintf(buf, "%ld\n", rate / 2);
-}
-
-static ssize_t ep93xx_pwm_get_freq(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
-
-	if (readl(pwm->mmio_base + EP93XX_PWMx_ENABLE) & 0x1) {
-		unsigned long rate = clk_get_rate(pwm->clk);
-		u16 term = readl(pwm->mmio_base + EP93XX_PWMx_TERM_COUNT);
-
-		return sprintf(buf, "%ld\n", rate / (term + 1));
-	} else {
-		return sprintf(buf, "disabled\n");
-	}
-}
-
-static ssize_t ep93xx_pwm_set_freq(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
-	long val;
-	int err;
-
-	err = kstrtol(buf, 10, &val);
-	if (err)
-		return -EINVAL;
-
-	if (val == 0) {
-		writel(0x0, pwm->mmio_base + EP93XX_PWMx_ENABLE);
-	} else if (val <= (clk_get_rate(pwm->clk) / 2)) {
-		u32 term, duty;
-
-		val = (clk_get_rate(pwm->clk) / val) - 1;
-		if (val > EP93XX_PWM_MAX_COUNT)
-			val = EP93XX_PWM_MAX_COUNT;
-		if (val < 1)
-			val = 1;
-
-		term = readl(pwm->mmio_base + EP93XX_PWMx_TERM_COUNT);
-		duty = ((val + 1) * pwm->duty_percent / 100) - 1;
-
-		/* If pwm is running, order is important */
-		if (val > term) {
-			writel(val, pwm->mmio_base + EP93XX_PWMx_TERM_COUNT);
-			writel(duty, pwm->mmio_base + EP93XX_PWMx_DUTY_CYCLE);
-		} else {
-			writel(duty, pwm->mmio_base + EP93XX_PWMx_DUTY_CYCLE);
-			writel(val, pwm->mmio_base + EP93XX_PWMx_TERM_COUNT);
-		}
-
-		if (!readl(pwm->mmio_base + EP93XX_PWMx_ENABLE) & 0x1)
-			writel(0x1, pwm->mmio_base + EP93XX_PWMx_ENABLE);
-	} else {
-		return -EINVAL;
-	}
-
-	return count;
-}
-
-static ssize_t ep93xx_pwm_get_duty_percent(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
-
-	return sprintf(buf, "%d\n", pwm->duty_percent);
-}
-
-static ssize_t ep93xx_pwm_set_duty_percent(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
-	long val;
-	int err;
-
-	err = kstrtol(buf, 10, &val);
-	if (err)
-		return -EINVAL;
-
-	if (val > 0 && val < 100) {
-		u32 term = readl(pwm->mmio_base + EP93XX_PWMx_TERM_COUNT);
-		u32 duty = ((term + 1) * val / 100) - 1;
-
-		writel(duty, pwm->mmio_base + EP93XX_PWMx_DUTY_CYCLE);
-		pwm->duty_percent = val;
-		return count;
-	}
-
-	return -EINVAL;
-}
-
-static ssize_t ep93xx_pwm_get_invert(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
-	int inverted = readl(pwm->mmio_base + EP93XX_PWMx_INVERT) & 0x1;
-
-	return sprintf(buf, "%d\n", inverted);
-}
-
-static ssize_t ep93xx_pwm_set_invert(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
-	long val;
-	int err;
-
-	err = kstrtol(buf, 10, &val);
-	if (err)
-		return -EINVAL;
-
-	if (val == 0)
-		writel(0x0, pwm->mmio_base + EP93XX_PWMx_INVERT);
-	else if (val == 1)
-		writel(0x1, pwm->mmio_base + EP93XX_PWMx_INVERT);
-	else
-		return -EINVAL;
-
-	return count;
-}
-
-static DEVICE_ATTR(min_freq, S_IRUGO, ep93xx_pwm_get_min_freq, NULL);
-static DEVICE_ATTR(max_freq, S_IRUGO, ep93xx_pwm_get_max_freq, NULL);
-static DEVICE_ATTR(freq, S_IWUSR | S_IRUGO,
-		   ep93xx_pwm_get_freq, ep93xx_pwm_set_freq);
-static DEVICE_ATTR(duty_percent, S_IWUSR | S_IRUGO,
-		   ep93xx_pwm_get_duty_percent, ep93xx_pwm_set_duty_percent);
-static DEVICE_ATTR(invert, S_IWUSR | S_IRUGO,
-		   ep93xx_pwm_get_invert, ep93xx_pwm_set_invert);
-
-static struct attribute *ep93xx_pwm_attrs[] = {
-	&dev_attr_min_freq.attr,
-	&dev_attr_max_freq.attr,
-	&dev_attr_freq.attr,
-	&dev_attr_duty_percent.attr,
-	&dev_attr_invert.attr,
-	NULL
-};
-
-static const struct attribute_group ep93xx_pwm_sysfs_files = {
-	.attrs	= ep93xx_pwm_attrs,
-};
-
-static int ep93xx_pwm_probe(struct platform_device *pdev)
-{
-	struct ep93xx_pwm *pwm;
-	struct resource *res;
-	int ret;
-
-	pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL);
-	if (!pwm)
-		return -ENOMEM;
-
-	pwm->clk = devm_clk_get(&pdev->dev, "pwm_clk");
-	if (IS_ERR(pwm->clk))
-		return PTR_ERR(pwm->clk);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	pwm->mmio_base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(pwm->mmio_base))
-		return PTR_ERR(pwm->mmio_base);
-
-	ret = ep93xx_pwm_acquire_gpio(pdev);
-	if (ret)
-		return ret;
-
-	ret = sysfs_create_group(&pdev->dev.kobj, &ep93xx_pwm_sysfs_files);
-	if (ret) {
-		ep93xx_pwm_release_gpio(pdev);
-		return ret;
-	}
-
-	pwm->duty_percent = 50;
-
-	/* disable pwm at startup. Avoids zero value. */
-	writel(0x0, pwm->mmio_base + EP93XX_PWMx_ENABLE);
-	writel(EP93XX_PWM_MAX_COUNT, pwm->mmio_base + EP93XX_PWMx_TERM_COUNT);
-	writel(EP93XX_PWM_MAX_COUNT/2, pwm->mmio_base + EP93XX_PWMx_DUTY_CYCLE);
-
-	clk_enable(pwm->clk);
-
-	platform_set_drvdata(pdev, pwm);
-	return 0;
-}
-
-static int ep93xx_pwm_remove(struct platform_device *pdev)
-{
-	struct ep93xx_pwm *pwm = platform_get_drvdata(pdev);
-
-	writel(0x0, pwm->mmio_base + EP93XX_PWMx_ENABLE);
-	clk_disable(pwm->clk);
-	sysfs_remove_group(&pdev->dev.kobj, &ep93xx_pwm_sysfs_files);
-	ep93xx_pwm_release_gpio(pdev);
-
-	return 0;
-}
-
-static struct platform_driver ep93xx_pwm_driver = {
-	.driver		= {
-		.name	= "ep93xx-pwm",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= ep93xx_pwm_probe,
-	.remove		= ep93xx_pwm_remove,
-};
-module_platform_driver(ep93xx_pwm_driver);
-
-MODULE_AUTHOR("Matthieu Crapet <mcrapet@gmail.com>, "
-	      "H Hartley Sweeten <hsweeten@visionengravers.com>");
-MODULE_DESCRIPTION("EP93xx PWM driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:ep93xx-pwm");
diff --git a/drivers/misc/fsa9480.c b/drivers/misc/fsa9480.c
index a725c79c35f..71d2793b372 100644
--- a/drivers/misc/fsa9480.c
+++ b/drivers/misc/fsa9480.c
@@ -396,7 +396,7 @@ static int fsa9480_irq_init(struct fsa9480_usbsw *usbsw)
 				IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 				"fsa9480 micro USB", usbsw);
 		if (ret) {
-			dev_err(&client->dev, "failed to reqeust IRQ\n");
+			dev_err(&client->dev, "failed to request IRQ\n");
 			return ret;
 		}
 
diff --git a/drivers/misc/genwqe/Kconfig b/drivers/misc/genwqe/Kconfig
new file mode 100644
index 00000000000..6069d8cd79d
--- /dev/null
+++ b/drivers/misc/genwqe/Kconfig
@@ -0,0 +1,13 @@
+#
+# IBM Accelerator Family 'GenWQE'
+#
+
+menuconfig GENWQE
+	tristate "GenWQE PCIe Accelerator"
+	depends on PCI && 64BIT
+	select CRC_ITU_T
+	default n
+	help
+	  Enables PCIe card driver for IBM GenWQE accelerators.
+	  The user-space interface is described in
+	  include/linux/genwqe/genwqe_card.h.
diff --git a/drivers/misc/genwqe/Makefile b/drivers/misc/genwqe/Makefile
new file mode 100644
index 00000000000..98a2b4f0b18
--- /dev/null
+++ b/drivers/misc/genwqe/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for GenWQE driver
+#
+
+obj-$(CONFIG_GENWQE) := genwqe_card.o
+genwqe_card-objs := card_base.o card_dev.o card_ddcb.o card_sysfs.o \
+	card_debugfs.o card_utils.o
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
new file mode 100644
index 00000000000..74d51c9bb85
--- /dev/null
+++ b/drivers/misc/genwqe/card_base.c
@@ -0,0 +1,1205 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Module initialization and PCIe setup. Card health monitoring and
+ * recovery functionality. Character device creation and deletion are
+ * controlled from here.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/err.h>
+#include <linux/aer.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/device.h>
+#include <linux/log2.h>
+#include <linux/genwqe/genwqe_card.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+MODULE_AUTHOR("Frank Haverkamp <haver@linux.vnet.ibm.com>");
+MODULE_AUTHOR("Michael Ruettger <michael@ibmra.de>");
+MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>");
+MODULE_AUTHOR("Michal Jung <mijung@de.ibm.com>");
+
+MODULE_DESCRIPTION("GenWQE Card");
+MODULE_VERSION(DRV_VERS_STRING);
+MODULE_LICENSE("GPL");
+
+static char genwqe_driver_name[] = GENWQE_DEVNAME;
+static struct class *class_genwqe;
+static struct dentry *debugfs_genwqe;
+static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX];
+
+/* PCI structure for identifying device by PCI vendor and device ID */
+static DEFINE_PCI_DEVICE_TABLE(genwqe_device_table) = {
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5 << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Initial SR-IOV bring-up image */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ .vendor      = PCI_VENDOR_ID_IBM,  /* VF Vendor ID */
+	  .device      = 0x0000,  /* VF Device ID */
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Fixed up image */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ .vendor      = PCI_VENDOR_ID_IBM,  /* VF Vendor ID */
+	  .device      = 0x0000,  /* VF Device ID */
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM_SRIOV,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5,
+	  .class       = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	/* Even one more ... */
+	{ .vendor      = PCI_VENDOR_ID_IBM,
+	  .device      = PCI_DEVICE_GENWQE,
+	  .subvendor   = PCI_SUBVENDOR_ID_IBM,
+	  .subdevice   = PCI_SUBSYSTEM_ID_GENWQE5_NEW,
+	  .class       = (PCI_CLASSCODE_GENWQE5 << 8),
+	  .class_mask  = ~0,
+	  .driver_data = 0 },
+
+	{ 0, }			/* 0 terminated list. */
+};
+
+MODULE_DEVICE_TABLE(pci, genwqe_device_table);
+
+/**
+ * genwqe_dev_alloc() - Create and prepare a new card descriptor
+ *
+ * Return: Pointer to card descriptor, or ERR_PTR(err) on error
+ */
+static struct genwqe_dev *genwqe_dev_alloc(void)
+{
+	unsigned int i = 0, j;
+	struct genwqe_dev *cd;
+
+	for (i = 0; i < GENWQE_CARD_NO_MAX; i++) {
+		if (genwqe_devices[i] == NULL)
+			break;
+	}
+	if (i >= GENWQE_CARD_NO_MAX)
+		return ERR_PTR(-ENODEV);
+
+	cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL);
+	if (!cd)
+		return ERR_PTR(-ENOMEM);
+
+	cd->card_idx = i;
+	cd->class_genwqe = class_genwqe;
+	cd->debugfs_genwqe = debugfs_genwqe;
+
+	init_waitqueue_head(&cd->queue_waitq);
+
+	spin_lock_init(&cd->file_lock);
+	INIT_LIST_HEAD(&cd->file_list);
+
+	cd->card_state = GENWQE_CARD_UNUSED;
+	spin_lock_init(&cd->print_lock);
+
+	cd->ddcb_software_timeout = genwqe_ddcb_software_timeout;
+	cd->kill_timeout = genwqe_kill_timeout;
+
+	for (j = 0; j < GENWQE_MAX_VFS; j++)
+		cd->vf_jobtimeout_msec[j] = genwqe_vf_jobtimeout_msec;
+
+	genwqe_devices[i] = cd;
+	return cd;
+}
+
+static void genwqe_dev_free(struct genwqe_dev *cd)
+{
+	if (!cd)
+		return;
+
+	genwqe_devices[cd->card_idx] = NULL;
+	kfree(cd);
+}
+
+/**
+ * genwqe_bus_reset() - Card recovery
+ *
+ * pci_reset_function() will recover the device and ensure that the
+ * registers are accessible again when it completes with success. If
+ * not, the card will stay dead and registers will be unaccessible
+ * still.
+ */
+static int genwqe_bus_reset(struct genwqe_dev *cd)
+{
+	int bars, rc = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	void __iomem *mmio;
+
+	if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE)
+		return -EIO;
+
+	mmio = cd->mmio;
+	cd->mmio = NULL;
+	pci_iounmap(pci_dev, mmio);
+
+	bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
+	pci_release_selected_regions(pci_dev, bars);
+
+	/*
+	 * Firmware/BIOS might change memory mapping during bus reset.
+	 * Settings like enable bus-mastering, ... are backuped and
+	 * restored by the pci_reset_function().
+	 */
+	dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__);
+	rc = pci_reset_function(pci_dev);
+	if (rc) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: failed reset func (rc %d)\n", __func__, rc);
+		return rc;
+	}
+	dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc);
+
+	/*
+	 * Here is the right spot to clear the register read
+	 * failure. pci_bus_reset() does this job in real systems.
+	 */
+	cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
+			    GENWQE_INJECT_GFIR_FATAL |
+			    GENWQE_INJECT_GFIR_INFO);
+
+	rc = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name);
+	if (rc) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: request bars failed (%d)\n", __func__, rc);
+		return -EIO;
+	}
+
+	cd->mmio = pci_iomap(pci_dev, 0, 0);
+	if (cd->mmio == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: mapping BAR0 failed\n", __func__);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/*
+ * Hardware circumvention section. Certain bitstreams in our test-lab
+ * had different kinds of problems. Here is where we adjust those
+ * bitstreams to function will with this version of our device driver.
+ *
+ * Thise circumventions are applied to the physical function only.
+ * The magical numbers below are identifying development/manufacturing
+ * versions of the bitstream used on the card.
+ *
+ * Turn off error reporting for old/manufacturing images.
+ */
+
+bool genwqe_need_err_masking(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
+}
+
+static void genwqe_tweak_hardware(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/* Mask FIRs for development images */
+	if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) &&
+	    ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) {
+		dev_warn(&pci_dev->dev,
+			 "FIRs masked due to bitstream %016llx.%016llx\n",
+			 cd->slu_unitcfg, cd->app_unitcfg);
+
+		__genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR,
+				0xFFFFFFFFFFFFFFFFull);
+
+		__genwqe_writeq(cd, IO_APP_ERR_ACT_MASK,
+				0x0000000000000000ull);
+	}
+}
+
+/**
+ * genwqe_recovery_on_fatal_gfir_required() - Version depended actions
+ *
+ * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must
+ * be ignored. This is e.g. true for the bitstream we gave to the card
+ * manufacturer, but also for some old bitstreams we released to our
+ * test-lab.
+ */
+int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull;
+}
+
+int genwqe_flash_readback_fails(struct genwqe_dev *cd)
+{
+	return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
+}
+
+/**
+ * genwqe_T_psec() - Calculate PF/VF timeout register content
+ *
+ * Note: From a design perspective it turned out to be a bad idea to
+ * use codes here to specifiy the frequency/speed values. An old
+ * driver cannot understand new codes and is therefore always a
+ * problem. Better is to measure out the value or put the
+ * speed/frequency directly into a register which is always a valid
+ * value for old as well as for new software.
+ */
+/* T = 1/f */
+static int genwqe_T_psec(struct genwqe_dev *cd)
+{
+	u16 speed;	/* 1/f -> 250,  200,  166,  175 */
+	static const int T[] = { 4000, 5000, 6000, 5714 };
+
+	speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
+	if (speed >= ARRAY_SIZE(T))
+		return -1;	/* illegal value */
+
+	return T[speed];
+}
+
+/**
+ * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution
+ *
+ * Do this _after_ card_reset() is called. Otherwise the values will
+ * vanish. The settings need to be done when the queues are inactive.
+ *
+ * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16.
+ * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16.
+ */
+static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd)
+{
+	u32 T = genwqe_T_psec(cd);
+	u64 x;
+
+	if (genwqe_pf_jobtimeout_msec == 0)
+		return false;
+
+	/* PF: large value needed, flash update 2sec per block */
+	x = ilog2(genwqe_pf_jobtimeout_msec *
+		  16000000000uL/(T * 15)) - 10;
+
+	genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+			  0xff00 | (x & 0xff), 0);
+	return true;
+}
+
+/**
+ * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution
+ */
+static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+	unsigned int vf;
+	u32 T = genwqe_T_psec(cd);
+	u64 x;
+
+	for (vf = 0; vf < pci_sriov_get_totalvfs(pci_dev); vf++) {
+
+		if (cd->vf_jobtimeout_msec[vf] == 0)
+			continue;
+
+		x = ilog2(cd->vf_jobtimeout_msec[vf] *
+			  16000000000uL/(T * 15)) - 10;
+
+		genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+				  0xff00 | (x & 0xff), vf + 1);
+	}
+	return true;
+}
+
+static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd)
+{
+	unsigned int type, e = 0;
+
+	for (type = 0; type < GENWQE_DBG_UNITS; type++) {
+		switch (type) {
+		case GENWQE_DBG_UNIT0:
+			e = genwqe_ffdc_buff_size(cd, 0);
+			break;
+		case GENWQE_DBG_UNIT1:
+			e = genwqe_ffdc_buff_size(cd, 1);
+			break;
+		case GENWQE_DBG_UNIT2:
+			e = genwqe_ffdc_buff_size(cd, 2);
+			break;
+		case GENWQE_DBG_REGS:
+			e = GENWQE_FFDC_REGS;
+			break;
+		}
+
+		/* currently support only the debug units mentioned here */
+		cd->ffdc[type].entries = e;
+		cd->ffdc[type].regs = kmalloc(e * sizeof(struct genwqe_reg),
+					      GFP_KERNEL);
+		/*
+		 * regs == NULL is ok, the using code treats this as no regs,
+		 * Printing warning is ok in this case.
+		 */
+	}
+	return 0;
+}
+
+static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd)
+{
+	unsigned int type;
+
+	for (type = 0; type < GENWQE_DBG_UNITS; type++) {
+		kfree(cd->ffdc[type].regs);
+		cd->ffdc[type].regs = NULL;
+	}
+}
+
+static int genwqe_read_ids(struct genwqe_dev *cd)
+{
+	int err = 0;
+	int slu_id;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) {
+		dev_err(&pci_dev->dev,
+			"err: SLUID=%016llx\n", cd->slu_unitcfg);
+		err = -EIO;
+		goto out_err;
+	}
+
+	slu_id = genwqe_get_slu_id(cd);
+	if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) {
+		dev_err(&pci_dev->dev,
+			"err: incompatible SLU Architecture %u\n", slu_id);
+		err = -ENOENT;
+		goto out_err;
+	}
+
+	cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
+	if (cd->app_unitcfg == IO_ILLEGAL_VALUE) {
+		dev_err(&pci_dev->dev,
+			"err: APPID=%016llx\n", cd->app_unitcfg);
+		err = -EIO;
+		goto out_err;
+	}
+	genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name));
+
+	/*
+	 * Is access to all registers possible? If we are a VF the
+	 * answer is obvious. If we run fully virtualized, we need to
+	 * check if we can access all registers. If we do not have
+	 * full access we will cause an UR and some informational FIRs
+	 * in the PF, but that should not harm.
+	 */
+	if (pci_dev->is_virtfn)
+		cd->is_privileged = 0;
+	else
+		cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
+				     != IO_ILLEGAL_VALUE);
+
+ out_err:
+	return err;
+}
+
+static int genwqe_start(struct genwqe_dev *cd)
+{
+	int err;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	err = genwqe_read_ids(cd);
+	if (err)
+		return err;
+
+	if (genwqe_is_privileged(cd)) {
+		/* do this after the tweaks. alloc fail is acceptable */
+		genwqe_ffdc_buffs_alloc(cd);
+		genwqe_stop_traps(cd);
+
+		/* Collect registers e.g. FIRs, UNITIDs, traces ... */
+		genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs,
+				      cd->ffdc[GENWQE_DBG_REGS].entries, 0);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0,
+				      cd->ffdc[GENWQE_DBG_UNIT0].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT0].entries);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1,
+				      cd->ffdc[GENWQE_DBG_UNIT1].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT1].entries);
+
+		genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2,
+				      cd->ffdc[GENWQE_DBG_UNIT2].regs,
+				      cd->ffdc[GENWQE_DBG_UNIT2].entries);
+
+		genwqe_start_traps(cd);
+
+		if (cd->card_state == GENWQE_CARD_FATAL_ERROR) {
+			dev_warn(&pci_dev->dev,
+				 "[%s] chip reload/recovery!\n", __func__);
+
+			/*
+			 * Stealth Mode: Reload chip on either hot
+			 * reset or PERST.
+			 */
+			cd->softreset = 0x7Cull;
+			__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
+				       cd->softreset);
+
+			err = genwqe_bus_reset(cd);
+			if (err != 0) {
+				dev_err(&pci_dev->dev,
+					"[%s] err: bus reset failed!\n",
+					__func__);
+				goto out;
+			}
+
+			/*
+			 * Re-read the IDs because
+			 * it could happen that the bitstream load
+			 * failed!
+			 */
+			err = genwqe_read_ids(cd);
+			if (err)
+				goto out;
+		}
+	}
+
+	err = genwqe_setup_service_layer(cd);  /* does a reset to the card */
+	if (err != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: could not setup servicelayer!\n", __func__);
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (genwqe_is_privileged(cd)) {	 /* code is running _after_ reset */
+		genwqe_tweak_hardware(cd);
+
+		genwqe_setup_pf_jtimer(cd);
+		genwqe_setup_vf_jtimer(cd);
+	}
+
+	err = genwqe_device_create(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: chdev init failed! (err=%d)\n", err);
+		goto out_release_service_layer;
+	}
+	return 0;
+
+ out_release_service_layer:
+	genwqe_release_service_layer(cd);
+ out:
+	if (genwqe_is_privileged(cd))
+		genwqe_ffdc_buffs_free(cd);
+	return -EIO;
+}
+
+/**
+ * genwqe_stop() - Stop card operation
+ *
+ * Recovery notes:
+ *   As long as genwqe_thread runs we might access registers during
+ *   error data capture. Same is with the genwqe_health_thread.
+ *   When genwqe_bus_reset() fails this function might called two times:
+ *   first by the genwqe_health_thread() and later by genwqe_remove() to
+ *   unbind the device. We must be able to survive that.
+ *
+ * This function must be robust enough to be called twice.
+ */
+static int genwqe_stop(struct genwqe_dev *cd)
+{
+	genwqe_finish_queue(cd);	    /* no register access */
+	genwqe_device_remove(cd);	    /* device removed, procs killed */
+	genwqe_release_service_layer(cd);   /* here genwqe_thread is stopped */
+
+	if (genwqe_is_privileged(cd)) {
+		pci_disable_sriov(cd->pci_dev);	/* access pci config space */
+		genwqe_ffdc_buffs_free(cd);
+	}
+
+	return 0;
+}
+
+/**
+ * genwqe_recover_card() - Try to recover the card if it is possible
+ *
+ * If fatal_err is set no register access is possible anymore. It is
+ * likely that genwqe_start fails in that situation. Proper error
+ * handling is required in this case.
+ *
+ * genwqe_bus_reset() will cause the pci code to call genwqe_remove()
+ * and later genwqe_probe() for all virtual functions.
+ */
+static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	genwqe_stop(cd);
+
+	/*
+	 * Make sure chip is not reloaded to maintain FFDC. Write SLU
+	 * Reset Register, CPLDReset field to 0.
+	 */
+	if (!fatal_err) {
+		cd->softreset = 0x70ull;
+		__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
+	}
+
+	rc = genwqe_bus_reset(cd);
+	if (rc != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: card recovery impossible!\n", __func__);
+		return rc;
+	}
+
+	rc = genwqe_start(cd);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: failed to launch device!\n", __func__);
+		return rc;
+	}
+	return 0;
+}
+
+static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir)
+{
+	*gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	return (*gfir & GFIR_ERR_TRIGGER) &&
+		genwqe_recovery_on_fatal_gfir_required(cd);
+}
+
+/**
+ * genwqe_fir_checking() - Check the fault isolation registers of the card
+ *
+ * If this code works ok, can be tried out with help of the genwqe_poke tool:
+ *   sudo ./tools/genwqe_poke 0x8 0xfefefefefef
+ *
+ * Now the relevant FIRs/sFIRs should be printed out and the driver should
+ * invoke recovery (devices are removed and readded).
+ */
+static u64 genwqe_fir_checking(struct genwqe_dev *cd)
+{
+	int j, iterations = 0;
+	u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec;
+	u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+ healthMonitor:
+	iterations++;
+	if (iterations > 16) {
+		dev_err(&pci_dev->dev, "* exit looping after %d times\n",
+			iterations);
+		goto fatal_error;
+	}
+
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if (gfir != 0x0)
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n",
+				    IO_SLC_CFGREG_GFIR, gfir);
+	if (gfir == IO_ILLEGAL_VALUE)
+		goto fatal_error;
+
+	/*
+	 * Avoid printing when to GFIR bit is on prevents contignous
+	 * printout e.g. for the following bug:
+	 *   FIR set without a 2ndary FIR/FIR cannot be cleared
+	 * Comment out the following if to get the prints:
+	 */
+	if (gfir == 0)
+		return 0;
+
+	gfir_masked = gfir & GFIR_ERR_TRIGGER;  /* fatal errors */
+
+	for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */
+
+		/* read the primary FIR (pfir) */
+		fir_addr = (uid << 24) + 0x08;
+		fir = __genwqe_readq(cd, fir_addr);
+		if (fir == 0x0)
+			continue;  /* no error in this unit */
+
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir);
+		if (fir == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		/* read primary FEC */
+		fec_addr = (uid << 24) + 0x18;
+		fec = __genwqe_readq(cd, fec_addr);
+
+		dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec);
+		if (fec == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) {
+
+			/* secondary fir empty, skip it */
+			if ((fir & mask) == 0x0)
+				continue;
+
+			sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
+			sfir = __genwqe_readq(cd, sfir_addr);
+
+			if (sfir == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+			dev_err(&pci_dev->dev,
+				"* 0x%08x 0x%016llx\n", sfir_addr, sfir);
+
+			sfec_addr = (uid << 24) + 0x300 + 0x08 * j;
+			sfec = __genwqe_readq(cd, sfec_addr);
+
+			if (sfec == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+			dev_err(&pci_dev->dev,
+				"* 0x%08x 0x%016llx\n", sfec_addr, sfec);
+
+			gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+			if (gfir == IO_ILLEGAL_VALUE)
+				goto fatal_error;
+
+			/* gfir turned on during routine! get out and
+			   start over. */
+			if ((gfir_masked == 0x0) &&
+			    (gfir & GFIR_ERR_TRIGGER)) {
+				goto healthMonitor;
+			}
+
+			/* do not clear if we entered with a fatal gfir */
+			if (gfir_masked == 0x0) {
+
+				/* NEW clear by mask the logged bits */
+				sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
+				__genwqe_writeq(cd, sfir_addr, sfir);
+
+				dev_dbg(&pci_dev->dev,
+					"[HM] Clearing  2ndary FIR 0x%08x "
+					"with 0x%016llx\n", sfir_addr, sfir);
+
+				/*
+				 * note, these cannot be error-Firs
+				 * since gfir_masked is 0 after sfir
+				 * was read. Also, it is safe to do
+				 * this write if sfir=0. Still need to
+				 * clear the primary. This just means
+				 * there is no secondary FIR.
+				 */
+
+				/* clear by mask the logged bit. */
+				fir_clr_addr = (uid << 24) + 0x10;
+				__genwqe_writeq(cd, fir_clr_addr, mask);
+
+				dev_dbg(&pci_dev->dev,
+					"[HM] Clearing primary FIR 0x%08x "
+					"with 0x%016llx\n", fir_clr_addr,
+					mask);
+			}
+		}
+	}
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if (gfir == IO_ILLEGAL_VALUE)
+		goto fatal_error;
+
+	if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) {
+		/*
+		 * Check once more that it didn't go on after all the
+		 * FIRS were cleared.
+		 */
+		dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n",
+			iterations);
+		goto healthMonitor;
+	}
+	return gfir_masked;
+
+ fatal_error:
+	return IO_ILLEGAL_VALUE;
+}
+
+/**
+ * genwqe_health_thread() - Health checking thread
+ *
+ * This thread is only started for the PF of the card.
+ *
+ * This thread monitors the health of the card. A critical situation
+ * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In
+ * this case we need to be recovered from outside. Writing to
+ * registers will very likely not work either.
+ *
+ * This thread must only exit if kthread_should_stop() becomes true.
+ *
+ * Condition for the health-thread to trigger:
+ *   a) when a kthread_stop() request comes in or
+ *   b) a critical GFIR occured
+ *
+ * Informational GFIRs are checked and potentially printed in
+ * health_check_interval seconds.
+ */
+static int genwqe_health_thread(void *data)
+{
+	int rc, should_stop = 0;
+	struct genwqe_dev *cd = data;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg;
+
+	while (!kthread_should_stop()) {
+		rc = wait_event_interruptible_timeout(cd->health_waitq,
+			 (genwqe_health_check_cond(cd, &gfir) ||
+			  (should_stop = kthread_should_stop())),
+				genwqe_health_check_interval * HZ);
+
+		if (should_stop)
+			break;
+
+		if (gfir == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] GFIR=%016llx\n", __func__, gfir);
+			goto fatal_error;
+		}
+
+		slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
+		if (slu_unitcfg == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] SLU_UNITCFG=%016llx\n",
+				__func__, slu_unitcfg);
+			goto fatal_error;
+		}
+
+		app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
+		if (app_unitcfg == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] APP_UNITCFG=%016llx\n",
+				__func__, app_unitcfg);
+			goto fatal_error;
+		}
+
+		gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+		if (gfir == IO_ILLEGAL_VALUE) {
+			dev_err(&pci_dev->dev,
+				"[%s] %s: GFIR=%016llx\n", __func__,
+				(gfir & GFIR_ERR_TRIGGER) ? "err" : "info",
+				gfir);
+			goto fatal_error;
+		}
+
+		gfir_masked = genwqe_fir_checking(cd);
+		if (gfir_masked == IO_ILLEGAL_VALUE)
+			goto fatal_error;
+
+		/*
+		 * GFIR ErrorTrigger bits set => reset the card!
+		 * Never do this for old/manufacturing images!
+		 */
+		if ((gfir_masked) && !cd->skip_recovery &&
+		    genwqe_recovery_on_fatal_gfir_required(cd)) {
+
+			cd->card_state = GENWQE_CARD_FATAL_ERROR;
+
+			rc = genwqe_recover_card(cd, 0);
+			if (rc < 0) {
+				/* FIXME Card is unusable and needs unbind! */
+				goto fatal_error;
+			}
+		}
+
+		cd->last_gfir = gfir;
+		cond_resched();
+	}
+
+	return 0;
+
+ fatal_error:
+	dev_err(&pci_dev->dev,
+		"[%s] card unusable. Please trigger unbind!\n", __func__);
+
+	/* Bring down logical devices to inform user space via udev remove. */
+	cd->card_state = GENWQE_CARD_FATAL_ERROR;
+	genwqe_stop(cd);
+
+	/* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */
+	while (!kthread_should_stop())
+		cond_resched();
+
+	return -EIO;
+}
+
+static int genwqe_health_check_start(struct genwqe_dev *cd)
+{
+	int rc;
+
+	if (genwqe_health_check_interval <= 0)
+		return 0;	/* valid for disabling the service */
+
+	/* moved before request_irq() */
+	/* init_waitqueue_head(&cd->health_waitq); */
+
+	cd->health_thread = kthread_run(genwqe_health_thread, cd,
+					GENWQE_DEVNAME "%d_health",
+					cd->card_idx);
+	if (IS_ERR(cd->health_thread)) {
+		rc = PTR_ERR(cd->health_thread);
+		cd->health_thread = NULL;
+		return rc;
+	}
+	return 0;
+}
+
+static int genwqe_health_thread_running(struct genwqe_dev *cd)
+{
+	return cd->health_thread != NULL;
+}
+
+static int genwqe_health_check_stop(struct genwqe_dev *cd)
+{
+	int rc;
+
+	if (!genwqe_health_thread_running(cd))
+		return -EIO;
+
+	rc = kthread_stop(cd->health_thread);
+	cd->health_thread = NULL;
+	return 0;
+}
+
+/**
+ * genwqe_pci_setup() - Allocate PCIe related resources for our card
+ */
+static int genwqe_pci_setup(struct genwqe_dev *cd)
+{
+	int err, bars;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
+	err = pci_enable_device_mem(pci_dev);
+	if (err) {
+		dev_err(&pci_dev->dev,
+			"err: failed to enable pci memory (err=%d)\n", err);
+		goto err_out;
+	}
+
+	/* Reserve PCI I/O and memory resources */
+	err = pci_request_selected_regions(pci_dev, bars, genwqe_driver_name);
+	if (err) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: request bars failed (%d)\n", __func__, err);
+		err = -EIO;
+		goto err_disable_device;
+	}
+
+	/* check for 64-bit DMA address supported (DAC) */
+	if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64))) {
+		err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(64));
+		if (err) {
+			dev_err(&pci_dev->dev,
+				"err: DMA64 consistent mask error\n");
+			err = -EIO;
+			goto out_release_resources;
+		}
+	/* check for 32-bit DMA address supported (SAC) */
+	} else if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) {
+		err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pci_dev->dev,
+				"err: DMA32 consistent mask error\n");
+			err = -EIO;
+			goto out_release_resources;
+		}
+	} else {
+		dev_err(&pci_dev->dev,
+			"err: neither DMA32 nor DMA64 supported\n");
+		err = -EIO;
+		goto out_release_resources;
+	}
+
+	pci_set_master(pci_dev);
+	pci_enable_pcie_error_reporting(pci_dev);
+
+	/* request complete BAR-0 space (length = 0) */
+	cd->mmio_len = pci_resource_len(pci_dev, 0);
+	cd->mmio = pci_iomap(pci_dev, 0, 0);
+	if (cd->mmio == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: mapping BAR0 failed\n", __func__);
+		err = -ENOMEM;
+		goto out_release_resources;
+	}
+
+	cd->num_vfs = pci_sriov_get_totalvfs(pci_dev);
+
+	err = genwqe_read_ids(cd);
+	if (err)
+		goto out_iounmap;
+
+	return 0;
+
+ out_iounmap:
+	pci_iounmap(pci_dev, cd->mmio);
+ out_release_resources:
+	pci_release_selected_regions(pci_dev, bars);
+ err_disable_device:
+	pci_disable_device(pci_dev);
+ err_out:
+	return err;
+}
+
+/**
+ * genwqe_pci_remove() - Free PCIe related resources for our card
+ */
+static void genwqe_pci_remove(struct genwqe_dev *cd)
+{
+	int bars;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (cd->mmio)
+		pci_iounmap(pci_dev, cd->mmio);
+
+	bars = pci_select_bars(pci_dev, IORESOURCE_MEM);
+	pci_release_selected_regions(pci_dev, bars);
+	pci_disable_device(pci_dev);
+}
+
+/**
+ * genwqe_probe() - Device initialization
+ * @pdev:	PCI device information struct
+ *
+ * Callable for multiple cards. This function is called on bind.
+ *
+ * Return: 0 if succeeded, < 0 when failed
+ */
+static int genwqe_probe(struct pci_dev *pci_dev,
+			const struct pci_device_id *id)
+{
+	int err;
+	struct genwqe_dev *cd;
+
+	genwqe_init_crc32();
+
+	cd = genwqe_dev_alloc();
+	if (IS_ERR(cd)) {
+		dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n",
+			(int)PTR_ERR(cd));
+		return PTR_ERR(cd);
+	}
+
+	dev_set_drvdata(&pci_dev->dev, cd);
+	cd->pci_dev = pci_dev;
+
+	err = genwqe_pci_setup(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: problems with PCI setup (err=%d)\n", err);
+		goto out_free_dev;
+	}
+
+	err = genwqe_start(cd);
+	if (err < 0) {
+		dev_err(&pci_dev->dev,
+			"err: cannot start card services! (err=%d)\n", err);
+		goto out_pci_remove;
+	}
+
+	if (genwqe_is_privileged(cd)) {
+		err = genwqe_health_check_start(cd);
+		if (err < 0) {
+			dev_err(&pci_dev->dev,
+				"err: cannot start health checking! "
+				"(err=%d)\n", err);
+			goto out_stop_services;
+		}
+	}
+	return 0;
+
+ out_stop_services:
+	genwqe_stop(cd);
+ out_pci_remove:
+	genwqe_pci_remove(cd);
+ out_free_dev:
+	genwqe_dev_free(cd);
+	return err;
+}
+
+/**
+ * genwqe_remove() - Called when device is removed (hot-plugable)
+ *
+ * Or when driver is unloaded respecitively when unbind is done.
+ */
+static void genwqe_remove(struct pci_dev *pci_dev)
+{
+	struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
+
+	genwqe_health_check_stop(cd);
+
+	/*
+	 * genwqe_stop() must survive if it is called twice
+	 * sequentially. This happens when the health thread calls it
+	 * and fails on genwqe_bus_reset().
+	 */
+	genwqe_stop(cd);
+	genwqe_pci_remove(cd);
+	genwqe_dev_free(cd);
+}
+
+/*
+ * genwqe_err_error_detected() - Error detection callback
+ *
+ * This callback is called by the PCI subsystem whenever a PCI bus
+ * error is detected.
+ */
+static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev,
+						 enum pci_channel_state state)
+{
+	struct genwqe_dev *cd;
+
+	dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state);
+
+	if (pci_dev == NULL)
+		return PCI_ERS_RESULT_NEED_RESET;
+
+	cd = dev_get_drvdata(&pci_dev->dev);
+	if (cd == NULL)
+		return PCI_ERS_RESULT_NEED_RESET;
+
+	switch (state) {
+	case pci_channel_io_normal:
+		return PCI_ERS_RESULT_CAN_RECOVER;
+	case pci_channel_io_frozen:
+		return PCI_ERS_RESULT_NEED_RESET;
+	case pci_channel_io_perm_failure:
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
+{
+	return PCI_ERS_RESULT_NONE;
+}
+
+static void genwqe_err_resume(struct pci_dev *dev)
+{
+}
+
+static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs)
+{
+	struct genwqe_dev *cd = dev_get_drvdata(&dev->dev);
+
+	if (numvfs > 0) {
+		genwqe_setup_vf_jtimer(cd);
+		pci_enable_sriov(dev, numvfs);
+		return numvfs;
+	}
+	if (numvfs == 0) {
+		pci_disable_sriov(dev);
+		return 0;
+	}
+	return 0;
+}
+
+static struct pci_error_handlers genwqe_err_handler = {
+	.error_detected = genwqe_err_error_detected,
+	.mmio_enabled	= genwqe_err_result_none,
+	.link_reset	= genwqe_err_result_none,
+	.slot_reset	= genwqe_err_result_none,
+	.resume		= genwqe_err_resume,
+};
+
+static struct pci_driver genwqe_driver = {
+	.name	  = genwqe_driver_name,
+	.id_table = genwqe_device_table,
+	.probe	  = genwqe_probe,
+	.remove	  = genwqe_remove,
+	.sriov_configure = genwqe_sriov_configure,
+	.err_handler = &genwqe_err_handler,
+};
+
+/**
+ * genwqe_init_module() - Driver registration and initialization
+ */
+static int __init genwqe_init_module(void)
+{
+	int rc;
+
+	class_genwqe = class_create(THIS_MODULE, GENWQE_DEVNAME);
+	if (IS_ERR(class_genwqe)) {
+		pr_err("[%s] create class failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL);
+	if (!debugfs_genwqe) {
+		rc = -ENOMEM;
+		goto err_out;
+	}
+
+	rc = pci_register_driver(&genwqe_driver);
+	if (rc != 0) {
+		pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc);
+		goto err_out0;
+	}
+
+	return rc;
+
+ err_out0:
+	debugfs_remove(debugfs_genwqe);
+ err_out:
+	class_destroy(class_genwqe);
+	return rc;
+}
+
+/**
+ * genwqe_exit_module() - Driver exit
+ */
+static void __exit genwqe_exit_module(void)
+{
+	pci_unregister_driver(&genwqe_driver);
+	debugfs_remove(debugfs_genwqe);
+	class_destroy(class_genwqe);
+}
+
+module_init(genwqe_init_module);
+module_exit(genwqe_exit_module);
diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h
new file mode 100644
index 00000000000..0e608a28860
--- /dev/null
+++ b/drivers/misc/genwqe/card_base.h
@@ -0,0 +1,577 @@
+#ifndef __CARD_BASE_H__
+#define __CARD_BASE_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Interfaces within the GenWQE module. Defines genwqe_card and
+ * ddcb_queue as well as ddcb_requ.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/stringify.h>
+#include <linux/pci.h>
+#include <linux/semaphore.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/version.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+#include <linux/genwqe/genwqe_card.h>
+#include "genwqe_driver.h"
+
+#define GENWQE_MSI_IRQS			4  /* Just one supported, no MSIx */
+#define GENWQE_FLAG_MSI_ENABLED		(1 << 0)
+
+#define GENWQE_MAX_VFS			15 /* maximum 15 VFs are possible */
+#define GENWQE_MAX_FUNCS		16 /* 1 PF and 15 VFs */
+#define GENWQE_CARD_NO_MAX		(16 * GENWQE_MAX_FUNCS)
+
+/* Compile parameters, some of them appear in debugfs for later adjustment */
+#define genwqe_ddcb_max			32 /* DDCBs on the work-queue */
+#define genwqe_polling_enabled		0  /* in case of irqs not working */
+#define genwqe_ddcb_software_timeout	10 /* timeout per DDCB in seconds */
+#define genwqe_kill_timeout		8  /* time until process gets killed */
+#define genwqe_vf_jobtimeout_msec	250  /* 250 msec */
+#define genwqe_pf_jobtimeout_msec	8000 /* 8 sec should be ok */
+#define genwqe_health_check_interval	4 /* <= 0: disabled */
+
+/* Sysfs attribute groups used when we create the genwqe device */
+extern const struct attribute_group *genwqe_attribute_groups[];
+
+/*
+ * Config space for Genwqe5 A7:
+ * 00:[14 10 4b 04]40 00 10 00[00 00 00 12]00 00 00 00
+ * 10: 0c 00 00 f0 07 3c 00 00 00 00 00 00 00 00 00 00
+ * 20: 00 00 00 00 00 00 00 00 00 00 00 00[14 10 4b 04]
+ * 30: 00 00 00 00 50 00 00 00 00 00 00 00 00 00 00 00
+ */
+#define PCI_DEVICE_GENWQE		0x044b /* Genwqe DeviceID */
+
+#define PCI_SUBSYSTEM_ID_GENWQE5	0x035f /* Genwqe A5 Subsystem-ID */
+#define PCI_SUBSYSTEM_ID_GENWQE5_NEW	0x044b /* Genwqe A5 Subsystem-ID */
+#define PCI_CLASSCODE_GENWQE5		0x1200 /* UNKNOWN */
+
+#define PCI_SUBVENDOR_ID_IBM_SRIOV	0x0000
+#define PCI_SUBSYSTEM_ID_GENWQE5_SRIOV	0x0000 /* Genwqe A5 Subsystem-ID */
+#define PCI_CLASSCODE_GENWQE5_SRIOV	0x1200 /* UNKNOWN */
+
+#define	GENWQE_SLU_ARCH_REQ		2 /* Required SLU architecture level */
+
+/**
+ * struct genwqe_reg - Genwqe data dump functionality
+ */
+struct genwqe_reg {
+	u32 addr;
+	u32 idx;
+	u64 val;
+};
+
+/*
+ * enum genwqe_dbg_type - Specify chip unit to dump/debug
+ */
+enum genwqe_dbg_type {
+	GENWQE_DBG_UNIT0 = 0,  /* captured before prev errs cleared */
+	GENWQE_DBG_UNIT1 = 1,
+	GENWQE_DBG_UNIT2 = 2,
+	GENWQE_DBG_UNIT3 = 3,
+	GENWQE_DBG_UNIT4 = 4,
+	GENWQE_DBG_UNIT5 = 5,
+	GENWQE_DBG_UNIT6 = 6,
+	GENWQE_DBG_UNIT7 = 7,
+	GENWQE_DBG_REGS  = 8,
+	GENWQE_DBG_DMA   = 9,
+	GENWQE_DBG_UNITS = 10, /* max number of possible debug units  */
+};
+
+/* Software error injection to simulate card failures */
+#define GENWQE_INJECT_HARDWARE_FAILURE	0x00000001 /* injects -1 reg reads */
+#define GENWQE_INJECT_BUS_RESET_FAILURE 0x00000002 /* pci_bus_reset fail */
+#define GENWQE_INJECT_GFIR_FATAL	0x00000004 /* GFIR = 0x0000ffff */
+#define GENWQE_INJECT_GFIR_INFO		0x00000008 /* GFIR = 0xffff0000 */
+
+/*
+ * Genwqe card description and management data.
+ *
+ * Error-handling in case of card malfunction
+ * ------------------------------------------
+ *
+ * If the card is detected to be defective the outside environment
+ * will cause the PCI layer to call deinit (the cleanup function for
+ * probe). This is the same effect like doing a unbind/bind operation
+ * on the card.
+ *
+ * The genwqe card driver implements a health checking thread which
+ * verifies the card function. If this detects a problem the cards
+ * device is being shutdown and restarted again, along with a reset of
+ * the card and queue.
+ *
+ * All functions accessing the card device return either -EIO or -ENODEV
+ * code to indicate the malfunction to the user. The user has to close
+ * the file descriptor and open a new one, once the card becomes
+ * available again.
+ *
+ * If the open file descriptor is setup to receive SIGIO, the signal is
+ * genereated for the application which has to provide a handler to
+ * react on it. If the application does not close the open
+ * file descriptor a SIGKILL is send to enforce freeing the cards
+ * resources.
+ *
+ * I did not find a different way to prevent kernel problems due to
+ * reference counters for the cards character devices getting out of
+ * sync. The character device deallocation does not block, even if
+ * there is still an open file descriptor pending. If this pending
+ * descriptor is closed, the data structures used by the character
+ * device is reinstantiated, which will lead to the reference counter
+ * dropping below the allowed values.
+ *
+ * Card recovery
+ * -------------
+ *
+ * To test the internal driver recovery the following command can be used:
+ *   sudo sh -c 'echo 0xfffff > /sys/class/genwqe/genwqe0_card/err_inject'
+ */
+
+
+/**
+ * struct dma_mapping_type - Mapping type definition
+ *
+ * To avoid memcpying data arround we use user memory directly. To do
+ * this we need to pin/swap-in the memory and request a DMA address
+ * for it.
+ */
+enum dma_mapping_type {
+	GENWQE_MAPPING_RAW = 0,		/* contignous memory buffer */
+	GENWQE_MAPPING_SGL_TEMP,	/* sglist dynamically used */
+	GENWQE_MAPPING_SGL_PINNED,	/* sglist used with pinning */
+};
+
+/**
+ * struct dma_mapping - Information about memory mappings done by the driver
+ */
+struct dma_mapping {
+	enum dma_mapping_type type;
+
+	void *u_vaddr;			/* user-space vaddr/non-aligned */
+	void *k_vaddr;			/* kernel-space vaddr/non-aligned */
+	dma_addr_t dma_addr;		/* physical DMA address */
+
+	struct page **page_list;	/* list of pages used by user buff */
+	dma_addr_t *dma_list;		/* list of dma addresses per page */
+	unsigned int nr_pages;		/* number of pages */
+	unsigned int size;		/* size in bytes */
+
+	struct list_head card_list;	/* list of usr_maps for card */
+	struct list_head pin_list;	/* list of pinned memory for dev */
+};
+
+static inline void genwqe_mapping_init(struct dma_mapping *m,
+				       enum dma_mapping_type type)
+{
+	memset(m, 0, sizeof(*m));
+	m->type = type;
+}
+
+/**
+ * struct ddcb_queue - DDCB queue data
+ * @ddcb_max:          Number of DDCBs on the queue
+ * @ddcb_next:         Next free DDCB
+ * @ddcb_act:          Next DDCB supposed to finish
+ * @ddcb_seq:          Sequence number of last DDCB
+ * @ddcbs_in_flight:   Currently enqueued DDCBs
+ * @ddcbs_completed:   Number of already completed DDCBs
+ * @busy:              Number of -EBUSY returns
+ * @ddcb_daddr:        DMA address of first DDCB in the queue
+ * @ddcb_vaddr:        Kernel virtual address of first DDCB in the queue
+ * @ddcb_req:          Associated requests (one per DDCB)
+ * @ddcb_waitqs:       Associated wait queues (one per DDCB)
+ * @ddcb_lock:         Lock to protect queuing operations
+ * @ddcb_waitq:        Wait on next DDCB finishing
+ */
+
+struct ddcb_queue {
+	int ddcb_max;			/* amount of DDCBs  */
+	int ddcb_next;			/* next available DDCB num */
+	int ddcb_act;			/* DDCB to be processed */
+	u16 ddcb_seq;			/* slc seq num */
+	unsigned int ddcbs_in_flight;	/* number of ddcbs in processing */
+	unsigned int ddcbs_completed;
+	unsigned int ddcbs_max_in_flight;
+	unsigned int busy;		/* how many times -EBUSY? */
+
+	dma_addr_t ddcb_daddr;		/* DMA address */
+	struct ddcb *ddcb_vaddr;	/* kernel virtual addr for DDCBs */
+	struct ddcb_requ **ddcb_req;	/* ddcb processing parameter */
+	wait_queue_head_t *ddcb_waitqs; /* waitqueue per ddcb */
+
+	spinlock_t ddcb_lock;		/* exclusive access to queue */
+	wait_queue_head_t ddcb_waitq;	/* wait for ddcb processing */
+
+	/* registers or the respective queue to be used */
+	u32 IO_QUEUE_CONFIG;
+	u32 IO_QUEUE_STATUS;
+	u32 IO_QUEUE_SEGMENT;
+	u32 IO_QUEUE_INITSQN;
+	u32 IO_QUEUE_WRAP;
+	u32 IO_QUEUE_OFFSET;
+	u32 IO_QUEUE_WTIME;
+	u32 IO_QUEUE_ERRCNTS;
+	u32 IO_QUEUE_LRW;
+};
+
+/*
+ * GFIR, SLU_UNITCFG, APP_UNITCFG
+ *   8 Units with FIR/FEC + 64 * 2ndary FIRS/FEC.
+ */
+#define GENWQE_FFDC_REGS	(3 + (8 * (2 + 2 * 64)))
+
+struct genwqe_ffdc {
+	unsigned int entries;
+	struct genwqe_reg *regs;
+};
+
+/**
+ * struct genwqe_dev - GenWQE device information
+ * @card_state:       Card operation state, see above
+ * @ffdc:             First Failure Data Capture buffers for each unit
+ * @card_thread:      Working thread to operate the DDCB queue
+ * @card_waitq:       Wait queue used in card_thread
+ * @queue:            DDCB queue
+ * @health_thread:    Card monitoring thread (only for PFs)
+ * @health_waitq:     Wait queue used in health_thread
+ * @pci_dev:          Associated PCI device (function)
+ * @mmio:             Base address of 64-bit register space
+ * @mmio_len:         Length of register area
+ * @file_lock:        Lock to protect access to file_list
+ * @file_list:        List of all processes with open GenWQE file descriptors
+ *
+ * This struct contains all information needed to communicate with a
+ * GenWQE card. It is initialized when a GenWQE device is found and
+ * destroyed when it goes away. It holds data to maintain the queue as
+ * well as data needed to feed the user interfaces.
+ */
+struct genwqe_dev {
+	enum genwqe_card_state card_state;
+	spinlock_t print_lock;
+
+	int card_idx;			/* card index 0..CARD_NO_MAX-1 */
+	u64 flags;			/* general flags */
+
+	/* FFDC data gathering */
+	struct genwqe_ffdc ffdc[GENWQE_DBG_UNITS];
+
+	/* DDCB workqueue */
+	struct task_struct *card_thread;
+	wait_queue_head_t queue_waitq;
+	struct ddcb_queue queue;	/* genwqe DDCB queue */
+	unsigned int irqs_processed;
+
+	/* Card health checking thread */
+	struct task_struct *health_thread;
+	wait_queue_head_t health_waitq;
+
+	/* char device */
+	dev_t  devnum_genwqe;		/* major/minor num card */
+	struct class *class_genwqe;	/* reference to class object */
+	struct device *dev;		/* for device creation */
+	struct cdev cdev_genwqe;	/* char device for card */
+
+	struct dentry *debugfs_root;	/* debugfs card root directory */
+	struct dentry *debugfs_genwqe;	/* debugfs driver root directory */
+
+	/* pci resources */
+	struct pci_dev *pci_dev;	/* PCI device */
+	void __iomem *mmio;		/* BAR-0 MMIO start */
+	unsigned long mmio_len;
+	u16 num_vfs;
+	u32 vf_jobtimeout_msec[GENWQE_MAX_VFS];
+	int is_privileged;		/* access to all regs possible */
+
+	/* config regs which we need often */
+	u64 slu_unitcfg;
+	u64 app_unitcfg;
+	u64 softreset;
+	u64 err_inject;
+	u64 last_gfir;
+	char app_name[5];
+
+	spinlock_t file_lock;		/* lock for open files */
+	struct list_head file_list;	/* list of open files */
+
+	/* debugfs parameters */
+	int ddcb_software_timeout;	/* wait until DDCB times out */
+	int skip_recovery;		/* circumvention if recovery fails */
+	int kill_timeout;		/* wait after sending SIGKILL */
+};
+
+/**
+ * enum genwqe_requ_state - State of a DDCB execution request
+ */
+enum genwqe_requ_state {
+	GENWQE_REQU_NEW      = 0,
+	GENWQE_REQU_ENQUEUED = 1,
+	GENWQE_REQU_TAPPED   = 2,
+	GENWQE_REQU_FINISHED = 3,
+	GENWQE_REQU_STATE_MAX,
+};
+
+/**
+ * struct genwqe_sgl - Scatter gather list describing user-space memory
+ * @sgl:            scatter gather list needs to be 128 byte aligned
+ * @sgl_dma_addr:   dma address of sgl
+ * @sgl_size:       size of area used for sgl
+ * @user_addr:      user-space address of memory area
+ * @user_size:      size of user-space memory area
+ * @page:           buffer for partial pages if needed
+ * @page_dma_addr:  dma address partial pages
+ */
+struct genwqe_sgl {
+	dma_addr_t sgl_dma_addr;
+	struct sg_entry *sgl;
+	size_t sgl_size;	/* size of sgl */
+
+	void __user *user_addr; /* user-space base-address */
+	size_t user_size;       /* size of memory area */
+
+	unsigned long nr_pages;
+	unsigned long fpage_offs;
+	size_t fpage_size;
+	size_t lpage_size;
+
+	void *fpage;
+	dma_addr_t fpage_dma_addr;
+
+	void *lpage;
+	dma_addr_t lpage_dma_addr;
+};
+
+int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+			  void __user *user_addr, size_t user_size);
+
+int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+		     dma_addr_t *dma_list);
+
+int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl);
+
+/**
+ * struct ddcb_requ - Kernel internal representation of the DDCB request
+ * @cmd:          User space representation of the DDCB execution request
+ */
+struct ddcb_requ {
+	/* kernel specific content */
+	enum genwqe_requ_state req_state; /* request status */
+	int num;			  /* ddcb_no for this request */
+	struct ddcb_queue *queue;	  /* associated queue */
+
+	struct dma_mapping  dma_mappings[DDCB_FIXUPS];
+	struct genwqe_sgl sgls[DDCB_FIXUPS];
+
+	/* kernel/user shared content */
+	struct genwqe_ddcb_cmd cmd;	/* ddcb_no for this request */
+	struct genwqe_debug_data debug_data;
+};
+
+/**
+ * struct genwqe_file - Information for open GenWQE devices
+ */
+struct genwqe_file {
+	struct genwqe_dev *cd;
+	struct genwqe_driver *client;
+	struct file *filp;
+
+	struct fasync_struct *async_queue;
+	struct task_struct *owner;
+	struct list_head list;		/* entry in list of open files */
+
+	spinlock_t map_lock;		/* lock for dma_mappings */
+	struct list_head map_list;	/* list of dma_mappings */
+
+	spinlock_t pin_lock;		/* lock for pinned memory */
+	struct list_head pin_list;	/* list of pinned memory */
+};
+
+int  genwqe_setup_service_layer(struct genwqe_dev *cd); /* for PF only */
+int  genwqe_finish_queue(struct genwqe_dev *cd);
+int  genwqe_release_service_layer(struct genwqe_dev *cd);
+
+/**
+ * genwqe_get_slu_id() - Read Service Layer Unit Id
+ * Return: 0x00: Development code
+ *         0x01: SLC1 (old)
+ *         0x02: SLC2 (sept2012)
+ *         0x03: SLC2 (feb2013, generic driver)
+ */
+static inline int genwqe_get_slu_id(struct genwqe_dev *cd)
+{
+	return (int)((cd->slu_unitcfg >> 32) & 0xff);
+}
+
+int  genwqe_ddcbs_in_flight(struct genwqe_dev *cd);
+
+u8   genwqe_card_type(struct genwqe_dev *cd);
+int  genwqe_card_reset(struct genwqe_dev *cd);
+int  genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count);
+void genwqe_reset_interrupt_capability(struct genwqe_dev *cd);
+
+int  genwqe_device_create(struct genwqe_dev *cd);
+int  genwqe_device_remove(struct genwqe_dev *cd);
+
+/* debugfs */
+int  genwqe_init_debugfs(struct genwqe_dev *cd);
+void genqwe_exit_debugfs(struct genwqe_dev *cd);
+
+int  genwqe_read_softreset(struct genwqe_dev *cd);
+
+/* Hardware Circumventions */
+int  genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd);
+int  genwqe_flash_readback_fails(struct genwqe_dev *cd);
+
+/**
+ * genwqe_write_vreg() - Write register in VF window
+ * @cd:    genwqe device
+ * @reg:   register address
+ * @val:   value to write
+ * @func:  0: PF, 1: VF0, ..., 15: VF14
+ */
+int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func);
+
+/**
+ * genwqe_read_vreg() - Read register in VF window
+ * @cd:    genwqe device
+ * @reg:   register address
+ * @func:  0: PF, 1: VF0, ..., 15: VF14
+ *
+ * Return: content of the register
+ */
+u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func);
+
+/* FFDC Buffer Management */
+int  genwqe_ffdc_buff_size(struct genwqe_dev *cd, int unit_id);
+int  genwqe_ffdc_buff_read(struct genwqe_dev *cd, int unit_id,
+			   struct genwqe_reg *regs, unsigned int max_regs);
+int  genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
+			   unsigned int max_regs, int all);
+int  genwqe_ffdc_dump_dma(struct genwqe_dev *cd,
+			  struct genwqe_reg *regs, unsigned int max_regs);
+
+int  genwqe_init_debug_data(struct genwqe_dev *cd,
+			    struct genwqe_debug_data *d);
+
+void genwqe_init_crc32(void);
+int  genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len);
+
+/* Memory allocation/deallocation; dma address handling */
+int  genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m,
+		      void *uaddr, unsigned long size,
+		      struct ddcb_requ *req);
+
+int  genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m,
+			struct ddcb_requ *req);
+
+static inline bool dma_mapping_used(struct dma_mapping *m)
+{
+	if (!m)
+		return 0;
+	return m->size != 0;
+}
+
+/**
+ * __genwqe_execute_ddcb() - Execute DDCB request with addr translation
+ *
+ * This function will do the address translation changes to the DDCBs
+ * according to the definitions required by the ATS field. It looks up
+ * the memory allocation buffer or does vmap/vunmap for the respective
+ * user-space buffers, inclusive page pinning and scatter gather list
+ * buildup and teardown.
+ */
+int  __genwqe_execute_ddcb(struct genwqe_dev *cd,
+			   struct genwqe_ddcb_cmd *cmd);
+
+/**
+ * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation
+ *
+ * This version will not do address translation or any modifcation of
+ * the DDCB data. It is used e.g. for the MoveFlash DDCB which is
+ * entirely prepared by the driver itself. That means the appropriate
+ * DMA addresses are already in the DDCB and do not need any
+ * modification.
+ */
+int  __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
+			       struct genwqe_ddcb_cmd *cmd);
+
+int  __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+int  __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+int  __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req);
+
+/* register access */
+int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val);
+u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs);
+int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val);
+u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs);
+
+void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
+				 dma_addr_t *dma_handle);
+void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
+			      void *vaddr, dma_addr_t dma_handle);
+
+/* Base clock frequency in MHz */
+int  genwqe_base_clock_frequency(struct genwqe_dev *cd);
+
+/* Before FFDC is captured the traps should be stopped. */
+void genwqe_stop_traps(struct genwqe_dev *cd);
+void genwqe_start_traps(struct genwqe_dev *cd);
+
+/* Hardware circumvention */
+bool genwqe_need_err_masking(struct genwqe_dev *cd);
+
+/**
+ * genwqe_is_privileged() - Determine operation mode for PCI function
+ *
+ * On Intel with SRIOV support we see:
+ *   PF: is_physfn = 1 is_virtfn = 0
+ *   VF: is_physfn = 0 is_virtfn = 1
+ *
+ * On Systems with no SRIOV support _and_ virtualized systems we get:
+ *       is_physfn = 0 is_virtfn = 0
+ *
+ * Other vendors have individual pci device ids to distinguish between
+ * virtual function drivers and physical function drivers. GenWQE
+ * unfortunately has just on pci device id for both, VFs and PF.
+ *
+ * The following code is used to distinguish if the card is running in
+ * privileged mode, either as true PF or in a virtualized system with
+ * full register access e.g. currently on PowerPC.
+ *
+ * if (pci_dev->is_virtfn)
+ *          cd->is_privileged = 0;
+ *  else
+ *          cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
+ *				 != IO_ILLEGAL_VALUE);
+ */
+static inline int genwqe_is_privileged(struct genwqe_dev *cd)
+{
+	return cd->is_privileged;
+}
+
+#endif	/* __CARD_BASE_H__ */
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
new file mode 100644
index 00000000000..c8046db2d5a
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -0,0 +1,1380 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Device Driver Control Block (DDCB) queue support. Definition of
+ * interrupt handlers for queue support as well as triggering the
+ * health monitor code in case of problems. The current hardware uses
+ * an MSI interrupt which is shared between error handling and
+ * functional code.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/crc-itu-t.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+/*
+ * N: next DDCB, this is where the next DDCB will be put.
+ * A: active DDCB, this is where the code will look for the next completion.
+ * x: DDCB is enqueued, we are waiting for its completion.
+
+ * Situation (1): Empty queue
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  |   |   |   |   |   |   |   |   |
+ *  +---+---+---+---+---+---+---+---+
+ *           A/N
+ *  enqueued_ddcbs = A - N = 2 - 2 = 0
+ *
+ * Situation (2): Wrapped, N > A
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  |   |   | x | x |   |   |   |   |
+ *  +---+---+---+---+---+---+---+---+
+ *            A       N
+ *  enqueued_ddcbs = N - A = 4 - 2 = 2
+ *
+ * Situation (3): Queue wrapped, A > N
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x |   |   | x | x | x | x |
+ *  +---+---+---+---+---+---+---+---+
+ *            N       A
+ *  enqueued_ddcbs = queue_max  - (A - N) = 8 - (4 - 2) = 6
+ *
+ * Situation (4a): Queue full N > A
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x | x | x | x | x | x |   |
+ *  +---+---+---+---+---+---+---+---+
+ *    A                           N
+ *
+ *  enqueued_ddcbs = N - A = 7 - 0 = 7
+ *
+ * Situation (4a): Queue full A > N
+ *  +---+---+---+---+---+---+---+---+
+ *  | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ *  | x | x | x |   | x | x | x | x |
+ *  +---+---+---+---+---+---+---+---+
+ *                N   A
+ *  enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 3) = 7
+ */
+
+static int queue_empty(struct ddcb_queue *queue)
+{
+	return queue->ddcb_next == queue->ddcb_act;
+}
+
+static int queue_enqueued_ddcbs(struct ddcb_queue *queue)
+{
+	if (queue->ddcb_next >= queue->ddcb_act)
+		return queue->ddcb_next - queue->ddcb_act;
+
+	return queue->ddcb_max - (queue->ddcb_act - queue->ddcb_next);
+}
+
+static int queue_free_ddcbs(struct ddcb_queue *queue)
+{
+	int free_ddcbs = queue->ddcb_max - queue_enqueued_ddcbs(queue) - 1;
+
+	if (WARN_ON_ONCE(free_ddcbs < 0)) { /* must never ever happen! */
+		return 0;
+	}
+	return free_ddcbs;
+}
+
+/*
+ * Use of the PRIV field in the DDCB for queue debugging:
+ *
+ * (1) Trying to get rid of a DDCB which saw a timeout:
+ *     pddcb->priv[6] = 0xcc;   # cleared
+ *
+ * (2) Append a DDCB via NEXT bit:
+ *     pddcb->priv[7] = 0xaa;	# appended
+ *
+ * (3) DDCB needed tapping:
+ *     pddcb->priv[7] = 0xbb;   # tapped
+ *
+ * (4) DDCB marked as correctly finished:
+ *     pddcb->priv[6] = 0xff;	# finished
+ */
+
+static inline void ddcb_mark_tapped(struct ddcb *pddcb)
+{
+	pddcb->priv[7] = 0xbb;  /* tapped */
+}
+
+static inline void ddcb_mark_appended(struct ddcb *pddcb)
+{
+	pddcb->priv[7] = 0xaa;	/* appended */
+}
+
+static inline void ddcb_mark_cleared(struct ddcb *pddcb)
+{
+	pddcb->priv[6] = 0xcc; /* cleared */
+}
+
+static inline void ddcb_mark_finished(struct ddcb *pddcb)
+{
+	pddcb->priv[6] = 0xff;	/* finished */
+}
+
+static inline void ddcb_mark_unused(struct ddcb *pddcb)
+{
+	pddcb->priv_64 = cpu_to_be64(0); /* not tapped */
+}
+
+/**
+ * genwqe_crc16() - Generate 16-bit crc as required for DDCBs
+ * @buff:       pointer to data buffer
+ * @len:        length of data for calculation
+ * @init:       initial crc (0xffff at start)
+ *
+ * Polynomial = x^16 + x^12 + x^5 + 1   (0x1021)
+ * Example: 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffff
+ *          should result in a crc16 of 0x89c3
+ *
+ * Return: crc16 checksum in big endian format !
+ */
+static inline u16 genwqe_crc16(const u8 *buff, size_t len, u16 init)
+{
+	return crc_itu_t(init, buff, len);
+}
+
+static void print_ddcb_info(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	int i;
+	struct ddcb *pddcb;
+	unsigned long flags;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	spin_lock_irqsave(&cd->print_lock, flags);
+
+	dev_info(&pci_dev->dev,
+		 "DDCB list for card #%d (ddcb_act=%d / ddcb_next=%d):\n",
+		 cd->card_idx, queue->ddcb_act, queue->ddcb_next);
+
+	pddcb = queue->ddcb_vaddr;
+	for (i = 0; i < queue->ddcb_max; i++) {
+		dev_err(&pci_dev->dev,
+			"  %c %-3d: RETC=%03x SEQ=%04x "
+			"HSI=%02X SHI=%02x PRIV=%06llx CMD=%03x\n",
+			i == queue->ddcb_act ? '>' : ' ',
+			i,
+			be16_to_cpu(pddcb->retc_16),
+			be16_to_cpu(pddcb->seqnum_16),
+			pddcb->hsi,
+			pddcb->shi,
+			be64_to_cpu(pddcb->priv_64),
+			pddcb->cmd);
+		pddcb++;
+	}
+	spin_unlock_irqrestore(&cd->print_lock, flags);
+}
+
+struct genwqe_ddcb_cmd *ddcb_requ_alloc(void)
+{
+	struct ddcb_requ *req;
+
+	req = kzalloc(sizeof(*req), GFP_ATOMIC);
+	if (!req)
+		return NULL;
+
+	return &req->cmd;
+}
+
+void ddcb_requ_free(struct genwqe_ddcb_cmd *cmd)
+{
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+	kfree(req);
+}
+
+static inline enum genwqe_requ_state ddcb_requ_get_state(struct ddcb_requ *req)
+{
+	return req->req_state;
+}
+
+static inline void ddcb_requ_set_state(struct ddcb_requ *req,
+				       enum genwqe_requ_state new_state)
+{
+	req->req_state = new_state;
+}
+
+static inline int ddcb_requ_collect_debug_data(struct ddcb_requ *req)
+{
+	return req->cmd.ddata_addr != 0x0;
+}
+
+/**
+ * ddcb_requ_finished() - Returns the hardware state of the associated DDCB
+ * @cd:          pointer to genwqe device descriptor
+ * @req:         DDCB work request
+ *
+ * Status of ddcb_requ mirrors this hardware state, but is copied in
+ * the ddcb_requ on interrupt/polling function. The lowlevel code
+ * should check the hardware state directly, the higher level code
+ * should check the copy.
+ *
+ * This function will also return true if the state of the queue is
+ * not GENWQE_CARD_USED. This enables us to purge all DDCBs in the
+ * shutdown case.
+ */
+static int ddcb_requ_finished(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	return (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) ||
+		(cd->card_state != GENWQE_CARD_USED);
+}
+
+/**
+ * enqueue_ddcb() - Enqueue a DDCB
+ * @cd:         pointer to genwqe device descriptor
+ * @queue:	queue this operation should be done on
+ * @ddcb_no:    pointer to ddcb number being tapped
+ *
+ * Start execution of DDCB by tapping or append to queue via NEXT
+ * bit. This is done by an atomic 'compare and swap' instruction and
+ * checking SHI and HSI of the previous DDCB.
+ *
+ * This function must only be called with ddcb_lock held.
+ *
+ * Return: 1 if new DDCB is appended to previous
+ *         2 if DDCB queue is tapped via register/simulation
+ */
+#define RET_DDCB_APPENDED 1
+#define RET_DDCB_TAPPED   2
+
+static int enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_queue *queue,
+			struct ddcb *pddcb, int ddcb_no)
+{
+	unsigned int try;
+	int prev_no;
+	struct ddcb *prev_ddcb;
+	__be32 old, new, icrc_hsi_shi;
+	u64 num;
+
+	/*
+	 * For performance checks a Dispatch Timestamp can be put into
+	 * DDCB It is supposed to use the SLU's free running counter,
+	 * but this requires PCIe cycles.
+	 */
+	ddcb_mark_unused(pddcb);
+
+	/* check previous DDCB if already fetched */
+	prev_no = (ddcb_no == 0) ? queue->ddcb_max - 1 : ddcb_no - 1;
+	prev_ddcb = &queue->ddcb_vaddr[prev_no];
+
+	/*
+	 * It might have happened that the HSI.FETCHED bit is
+	 * set. Retry in this case. Therefore I expect maximum 2 times
+	 * trying.
+	 */
+	ddcb_mark_appended(pddcb);
+	for (try = 0; try < 2; try++) {
+		old = prev_ddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */
+
+		/* try to append via NEXT bit if prev DDCB is not completed */
+		if ((old & DDCB_COMPLETED_BE32) != 0x00000000)
+			break;
+
+		new = (old | DDCB_NEXT_BE32);
+
+		wmb();
+		icrc_hsi_shi = cmpxchg(&prev_ddcb->icrc_hsi_shi_32, old, new);
+
+		if (icrc_hsi_shi == old)
+			return RET_DDCB_APPENDED; /* appended to queue */
+	}
+
+	/* Queue must be re-started by updating QUEUE_OFFSET */
+	ddcb_mark_tapped(pddcb);
+	num = (u64)ddcb_no << 8;
+
+	wmb();
+	__genwqe_writeq(cd, queue->IO_QUEUE_OFFSET, num); /* start queue */
+
+	return RET_DDCB_TAPPED;
+}
+
+/**
+ * copy_ddcb_results() - Copy output state from real DDCB to request
+ *
+ * Copy DDCB ASV to request struct. There is no endian
+ * conversion made, since data structure in ASV is still
+ * unknown here.
+ *
+ * This is needed by:
+ *   - genwqe_purge_ddcb()
+ *   - genwqe_check_ddcb_queue()
+ */
+static void copy_ddcb_results(struct ddcb_requ *req, int ddcb_no)
+{
+	struct ddcb_queue *queue = req->queue;
+	struct ddcb *pddcb = &queue->ddcb_vaddr[req->num];
+
+	memcpy(&req->cmd.asv[0], &pddcb->asv[0], DDCB_ASV_LENGTH);
+
+	/* copy status flags of the variant part */
+	req->cmd.vcrc     = be16_to_cpu(pddcb->vcrc_16);
+	req->cmd.deque_ts = be64_to_cpu(pddcb->deque_ts_64);
+	req->cmd.cmplt_ts = be64_to_cpu(pddcb->cmplt_ts_64);
+
+	req->cmd.attn     = be16_to_cpu(pddcb->attn_16);
+	req->cmd.progress = be32_to_cpu(pddcb->progress_32);
+	req->cmd.retc     = be16_to_cpu(pddcb->retc_16);
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		int prev_no = (ddcb_no == 0) ?
+			queue->ddcb_max - 1 : ddcb_no - 1;
+		struct ddcb *prev_pddcb = &queue->ddcb_vaddr[prev_no];
+
+		memcpy(&req->debug_data.ddcb_finished, pddcb,
+		       sizeof(req->debug_data.ddcb_finished));
+		memcpy(&req->debug_data.ddcb_prev, prev_pddcb,
+		       sizeof(req->debug_data.ddcb_prev));
+	}
+}
+
+/**
+ * genwqe_check_ddcb_queue() - Checks DDCB queue for completed work equests.
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * Return: Number of DDCBs which were finished
+ */
+static int genwqe_check_ddcb_queue(struct genwqe_dev *cd,
+				   struct ddcb_queue *queue)
+{
+	unsigned long flags;
+	int ddcbs_finished = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	/* FIXME avoid soft locking CPU */
+	while (!queue_empty(queue) && (ddcbs_finished < queue->ddcb_max)) {
+
+		struct ddcb *pddcb;
+		struct ddcb_requ *req;
+		u16 vcrc, vcrc_16, retc_16;
+
+		pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
+
+		if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) ==
+		    0x00000000)
+			goto go_home; /* not completed, continue waiting */
+
+		/* Note: DDCB could be purged */
+
+		req = queue->ddcb_req[queue->ddcb_act];
+		if (req == NULL) {
+			/* this occurs if DDCB is purged, not an error */
+			/* Move active DDCB further; Nothing to do anymore. */
+			goto pick_next_one;
+		}
+
+		/*
+		 * HSI=0x44 (fetched and completed), but RETC is
+		 * 0x101, or even worse 0x000.
+		 *
+		 * In case of seeing the queue in inconsistent state
+		 * we read the errcnts and the queue status to provide
+		 * a trigger for our PCIe analyzer stop capturing.
+		 */
+		retc_16 = be16_to_cpu(pddcb->retc_16);
+		if ((pddcb->hsi == 0x44) && (retc_16 <= 0x101)) {
+			u64 errcnts, status;
+			u64 ddcb_offs = (u64)pddcb - (u64)queue->ddcb_vaddr;
+
+			errcnts = __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS);
+			status  = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
+
+			dev_err(&pci_dev->dev,
+				"[%s] SEQN=%04x HSI=%02x RETC=%03x "
+				" Q_ERRCNTS=%016llx Q_STATUS=%016llx\n"
+				" DDCB_DMA_ADDR=%016llx\n",
+				__func__, be16_to_cpu(pddcb->seqnum_16),
+				pddcb->hsi, retc_16, errcnts, status,
+				queue->ddcb_daddr + ddcb_offs);
+		}
+
+		copy_ddcb_results(req, queue->ddcb_act);
+		queue->ddcb_req[queue->ddcb_act] = NULL; /* take from queue */
+
+		dev_dbg(&pci_dev->dev, "FINISHED DDCB#%d\n", req->num);
+		genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+		ddcb_mark_finished(pddcb);
+
+		/* calculate CRC_16 to see if VCRC is correct */
+		vcrc = genwqe_crc16(pddcb->asv,
+				   VCRC_LENGTH(req->cmd.asv_length),
+				   0xffff);
+		vcrc_16 = be16_to_cpu(pddcb->vcrc_16);
+		if (vcrc != vcrc_16) {
+			printk_ratelimited(KERN_ERR
+				"%s %s: err: wrong VCRC pre=%02x vcrc_len=%d "
+				"bytes vcrc_data=%04x is not vcrc_card=%04x\n",
+				GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+				pddcb->pre, VCRC_LENGTH(req->cmd.asv_length),
+				vcrc, vcrc_16);
+		}
+
+		ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
+		queue->ddcbs_completed++;
+		queue->ddcbs_in_flight--;
+
+		/* wake up process waiting for this DDCB */
+		wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
+
+pick_next_one:
+		queue->ddcb_act = (queue->ddcb_act + 1) % queue->ddcb_max;
+		ddcbs_finished++;
+	}
+
+ go_home:
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	return ddcbs_finished;
+}
+
+/**
+ * __genwqe_wait_ddcb(): Waits until DDCB is completed
+ * @cd:         pointer to genwqe device descriptor
+ * @req:        pointer to requsted DDCB parameters
+ *
+ * The Service Layer will update the RETC in DDCB when processing is
+ * pending or done.
+ *
+ * Return: > 0 remaining jiffies, DDCB completed
+ *           -ETIMEDOUT	when timeout
+ *           -ERESTARTSYS when ^C
+ *           -EINVAL when unknown error condition
+ *
+ * When an error is returned the called needs to ensure that
+ * purge_ddcb() is being called to get the &req removed from the
+ * queue.
+ */
+int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	int rc;
+	unsigned int ddcb_no;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (req == NULL)
+		return -EINVAL;
+
+	queue = req->queue;
+	if (queue == NULL)
+		return -EINVAL;
+
+	ddcb_no = req->num;
+	if (ddcb_no >= queue->ddcb_max)
+		return -EINVAL;
+
+	rc = wait_event_interruptible_timeout(queue->ddcb_waitqs[ddcb_no],
+				ddcb_requ_finished(cd, req),
+				genwqe_ddcb_software_timeout * HZ);
+
+	/*
+	 * We need to distinguish 3 cases here:
+	 *   1. rc == 0              timeout occured
+	 *   2. rc == -ERESTARTSYS   signal received
+	 *   3. rc > 0               remaining jiffies condition is true
+	 */
+	if (rc == 0) {
+		struct ddcb_queue *queue = req->queue;
+		struct ddcb *pddcb;
+
+		/*
+		 * Timeout may be caused by long task switching time.
+		 * When timeout happens, check if the request has
+		 * meanwhile completed.
+		 */
+		genwqe_check_ddcb_queue(cd, req->queue);
+		if (ddcb_requ_finished(cd, req))
+			return rc;
+
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d timeout rc=%d state=%d req @ %p\n",
+			__func__, req->num, rc,	ddcb_requ_get_state(req),
+			req);
+		dev_err(&pci_dev->dev,
+			"[%s]      IO_QUEUE_STATUS=0x%016llx\n", __func__,
+			__genwqe_readq(cd, queue->IO_QUEUE_STATUS));
+
+		pddcb = &queue->ddcb_vaddr[req->num];
+		genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+		print_ddcb_info(cd, req->queue);
+		return -ETIMEDOUT;
+
+	} else if (rc == -ERESTARTSYS) {
+		return rc;
+		/*
+		 * EINTR:       Stops the application
+		 * ERESTARTSYS: Restartable systemcall; called again
+		 */
+
+	} else if (rc < 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d unknown result (rc=%d) %d!\n",
+			__func__, req->num, rc, ddcb_requ_get_state(req));
+		return -EINVAL;
+	}
+
+	/* Severe error occured. Driver is forced to stop operation */
+	if (cd->card_state != GENWQE_CARD_USED) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: DDCB#%d forced to stop (rc=%d)\n",
+			__func__, req->num, rc);
+		return -EIO;
+	}
+	return rc;
+}
+
+/**
+ * get_next_ddcb() - Get next available DDCB
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * DDCB's content is completely cleared but presets for PRE and
+ * SEQNUM. This function must only be called when ddcb_lock is held.
+ *
+ * Return: NULL if no empty DDCB available otherwise ptr to next DDCB.
+ */
+static struct ddcb *get_next_ddcb(struct genwqe_dev *cd,
+				  struct ddcb_queue *queue,
+				  int *num)
+{
+	u64 *pu64;
+	struct ddcb *pddcb;
+
+	if (queue_free_ddcbs(queue) == 0) /* queue is  full */
+		return NULL;
+
+	/* find new ddcb */
+	pddcb = &queue->ddcb_vaddr[queue->ddcb_next];
+
+	/* if it is not completed, we are not allowed to use it */
+	/* barrier(); */
+	if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == 0x00000000)
+		return NULL;
+
+	*num = queue->ddcb_next;	/* internal DDCB number */
+	queue->ddcb_next = (queue->ddcb_next + 1) % queue->ddcb_max;
+
+	/* clear important DDCB fields */
+	pu64 = (u64 *)pddcb;
+	pu64[0] = 0ULL;		/* offs 0x00 (ICRC,HSI,SHI,...) */
+	pu64[1] = 0ULL;		/* offs 0x01 (ACFUNC,CMD...) */
+
+	/* destroy previous results in ASV */
+	pu64[0x80/8] = 0ULL;	/* offs 0x80 (ASV + 0) */
+	pu64[0x88/8] = 0ULL;	/* offs 0x88 (ASV + 0x08) */
+	pu64[0x90/8] = 0ULL;	/* offs 0x90 (ASV + 0x10) */
+	pu64[0x98/8] = 0ULL;	/* offs 0x98 (ASV + 0x18) */
+	pu64[0xd0/8] = 0ULL;	/* offs 0xd0 (RETC,ATTN...) */
+
+	pddcb->pre = DDCB_PRESET_PRE; /* 128 */
+	pddcb->seqnum_16 = cpu_to_be16(queue->ddcb_seq++);
+	return pddcb;
+}
+
+/**
+ * __genwqe_purge_ddcb() - Remove a DDCB from the workqueue
+ * @cd:         genwqe device descriptor
+ * @req:        DDCB request
+ *
+ * This will fail when the request was already FETCHED. In this case
+ * we need to wait until it is finished. Else the DDCB can be
+ * reused. This function also ensures that the request data structure
+ * is removed from ddcb_req[].
+ *
+ * Do not forget to call this function when genwqe_wait_ddcb() fails,
+ * such that the request gets really removed from ddcb_req[].
+ *
+ * Return: 0 success
+ */
+int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	struct ddcb *pddcb = NULL;
+	unsigned int t;
+	unsigned long flags;
+	struct ddcb_queue *queue = req->queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u64 queue_status;
+	__be32 icrc_hsi_shi = 0x0000;
+	__be32 old, new;
+
+	/* unsigned long flags; */
+	if (genwqe_ddcb_software_timeout <= 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: software timeout is not set!\n", __func__);
+		return -EFAULT;
+	}
+
+	pddcb = &queue->ddcb_vaddr[req->num];
+
+	for (t = 0; t < genwqe_ddcb_software_timeout * 10; t++) {
+
+		spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+		/* Check if req was meanwhile finished */
+		if (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED)
+			goto go_home;
+
+		/* try to set PURGE bit if FETCHED/COMPLETED are not set */
+		old = pddcb->icrc_hsi_shi_32;	/* read SHI/HSI in BE32 */
+		if ((old & DDCB_FETCHED_BE32) == 0x00000000) {
+
+			new = (old | DDCB_PURGE_BE32);
+			icrc_hsi_shi = cmpxchg(&pddcb->icrc_hsi_shi_32,
+					       old, new);
+			if (icrc_hsi_shi == old)
+				goto finish_ddcb;
+		}
+
+		/* normal finish with HSI bit */
+		barrier();
+		icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
+		if (icrc_hsi_shi & DDCB_COMPLETED_BE32)
+			goto finish_ddcb;
+
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+		/*
+		 * Here the check_ddcb() function will most likely
+		 * discover this DDCB to be finished some point in
+		 * time. It will mark the req finished and free it up
+		 * in the list.
+		 */
+
+		copy_ddcb_results(req, req->num); /* for the failing case */
+		msleep(100); /* sleep for 1/10 second and try again */
+		continue;
+
+finish_ddcb:
+		copy_ddcb_results(req, req->num);
+		ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
+		queue->ddcbs_in_flight--;
+		queue->ddcb_req[req->num] = NULL; /* delete from array */
+		ddcb_mark_cleared(pddcb);
+
+		/* Move active DDCB further; Nothing to do here anymore. */
+
+		/*
+		 * We need to ensure that there is at least one free
+		 * DDCB in the queue. To do that, we must update
+		 * ddcb_act only if the COMPLETED bit is set for the
+		 * DDCB we are working on else we treat that DDCB even
+		 * if we PURGED it as occupied (hardware is supposed
+		 * to set the COMPLETED bit yet!).
+		 */
+		icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
+		if ((icrc_hsi_shi & DDCB_COMPLETED_BE32) &&
+		    (queue->ddcb_act == req->num)) {
+			queue->ddcb_act = ((queue->ddcb_act + 1) %
+					   queue->ddcb_max);
+		}
+go_home:
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 0;
+	}
+
+	/*
+	 * If the card is dead and the queue is forced to stop, we
+	 * might see this in the queue status register.
+	 */
+	queue_status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
+
+	dev_dbg(&pci_dev->dev, "UN/FINISHED DDCB#%d\n", req->num);
+	genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+	dev_err(&pci_dev->dev,
+		"[%s] err: DDCB#%d not purged and not completed "
+		"after %d seconds QSTAT=%016llx!!\n",
+		__func__, req->num, genwqe_ddcb_software_timeout,
+		queue_status);
+
+	print_ddcb_info(cd, req->queue);
+
+	return -EFAULT;
+}
+
+int genwqe_init_debug_data(struct genwqe_dev *cd, struct genwqe_debug_data *d)
+{
+	int len;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (d == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: invalid memory for debug data!\n",
+			__func__);
+		return -EFAULT;
+	}
+
+	len  = sizeof(d->driver_version);
+	snprintf(d->driver_version, len, "%s", DRV_VERS_STRING);
+	d->slu_unitcfg = cd->slu_unitcfg;
+	d->app_unitcfg = cd->app_unitcfg;
+	return 0;
+}
+
+/**
+ * __genwqe_enqueue_ddcb() - Enqueue a DDCB
+ * @cd:          pointer to genwqe device descriptor
+ * @req:         pointer to DDCB execution request
+ *
+ * Return: 0 if enqueuing succeeded
+ *         -EIO if card is unusable/PCIe problems
+ *         -EBUSY if enqueuing failed
+ */
+int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
+{
+	struct ddcb *pddcb;
+	unsigned long flags;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	u16 icrc;
+
+	if (cd->card_state != GENWQE_CARD_USED) {
+		printk_ratelimited(KERN_ERR
+			"%s %s: [%s] Card is unusable/PCIe problem Req#%d\n",
+			GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+			__func__, req->num);
+		return -EIO;
+	}
+
+	queue = req->queue = &cd->queue;
+
+	/* FIXME circumvention to improve performance when no irq is
+	 * there.
+	 */
+	if (genwqe_polling_enabled)
+		genwqe_check_ddcb_queue(cd, queue);
+
+	/*
+	 * It must be ensured to process all DDCBs in successive
+	 * order. Use a lock here in order to prevent nested DDCB
+	 * enqueuing.
+	 */
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	pddcb = get_next_ddcb(cd, queue, &req->num);	/* get ptr and num */
+	if (pddcb == NULL) {
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		queue->busy++;
+		return -EBUSY;
+	}
+
+	if (queue->ddcb_req[req->num] != NULL) {
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+		dev_err(&pci_dev->dev,
+			"[%s] picked DDCB %d with req=%p still in use!!\n",
+			__func__, req->num, req);
+		return -EFAULT;
+	}
+	ddcb_requ_set_state(req, GENWQE_REQU_ENQUEUED);
+	queue->ddcb_req[req->num] = req;
+
+	pddcb->cmdopts_16 = cpu_to_be16(req->cmd.cmdopts);
+	pddcb->cmd = req->cmd.cmd;
+	pddcb->acfunc = req->cmd.acfunc;	/* functional unit */
+
+	/*
+	 * We know that we can get retc 0x104 with CRC error, do not
+	 * stop the queue in those cases for this command. XDIR = 1
+	 * does not work for old SLU versions.
+	 *
+	 * Last bitstream with the old XDIR behavior had SLU_ID
+	 * 0x34199.
+	 */
+	if ((cd->slu_unitcfg & 0xFFFF0ull) > 0x34199ull)
+		pddcb->xdir = 0x1;
+	else
+		pddcb->xdir = 0x0;
+
+
+	pddcb->psp = (((req->cmd.asiv_length / 8) << 4) |
+		      ((req->cmd.asv_length  / 8)));
+	pddcb->disp_ts_64 = cpu_to_be64(req->cmd.disp_ts);
+
+	/*
+	 * If copying the whole DDCB_ASIV_LENGTH is impacting
+	 * performance we need to change it to
+	 * req->cmd.asiv_length. But simulation benefits from some
+	 * non-architectured bits behind the architectured content.
+	 *
+	 * How much data is copied depends on the availability of the
+	 * ATS field, which was introduced late. If the ATS field is
+	 * supported ASIV is 8 bytes shorter than it used to be. Since
+	 * the ATS field is copied too, the code should do exactly
+	 * what it did before, but I wanted to make copying of the ATS
+	 * field very explicit.
+	 */
+	if (genwqe_get_slu_id(cd) <= 0x2) {
+		memcpy(&pddcb->__asiv[0],	/* destination */
+		       &req->cmd.__asiv[0],	/* source */
+		       DDCB_ASIV_LENGTH);	/* req->cmd.asiv_length */
+	} else {
+		pddcb->n.ats_64 = cpu_to_be64(req->cmd.ats);
+		memcpy(&pddcb->n.asiv[0],	/* destination */
+			&req->cmd.asiv[0],	/* source */
+			DDCB_ASIV_LENGTH_ATS);	/* req->cmd.asiv_length */
+	}
+
+	pddcb->icrc_hsi_shi_32 = cpu_to_be32(0x00000000); /* for crc */
+
+	/*
+	 * Calculate CRC_16 for corresponding range PSP(7:4). Include
+	 * empty 4 bytes prior to the data.
+	 */
+	icrc = genwqe_crc16((const u8 *)pddcb,
+			   ICRC_LENGTH(req->cmd.asiv_length), 0xffff);
+	pddcb->icrc_hsi_shi_32 = cpu_to_be32((u32)icrc << 16);
+
+	/* enable DDCB completion irq */
+	if (!genwqe_polling_enabled)
+		pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32;
+
+	dev_dbg(&pci_dev->dev, "INPUT DDCB#%d\n", req->num);
+	genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		/* use the kernel copy of debug data. copying back to
+		   user buffer happens later */
+
+		genwqe_init_debug_data(cd, &req->debug_data);
+		memcpy(&req->debug_data.ddcb_before, pddcb,
+		       sizeof(req->debug_data.ddcb_before));
+	}
+
+	enqueue_ddcb(cd, queue, pddcb, req->num);
+	queue->ddcbs_in_flight++;
+
+	if (queue->ddcbs_in_flight > queue->ddcbs_max_in_flight)
+		queue->ddcbs_max_in_flight = queue->ddcbs_in_flight;
+
+	ddcb_requ_set_state(req, GENWQE_REQU_TAPPED);
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	wake_up_interruptible(&cd->queue_waitq);
+
+	return 0;
+}
+
+/**
+ * __genwqe_execute_raw_ddcb() - Setup and execute DDCB
+ * @cd:         pointer to genwqe device descriptor
+ * @req:        user provided DDCB request
+ */
+int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
+			     struct genwqe_ddcb_cmd *cmd)
+{
+	int rc = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+	if (cmd->asiv_length > DDCB_ASIV_LENGTH) {
+		dev_err(&pci_dev->dev, "[%s] err: wrong asiv_length of %d\n",
+			__func__, cmd->asiv_length);
+		return -EINVAL;
+	}
+	if (cmd->asv_length > DDCB_ASV_LENGTH) {
+		dev_err(&pci_dev->dev, "[%s] err: wrong asv_length of %d\n",
+			__func__, cmd->asiv_length);
+		return -EINVAL;
+	}
+	rc = __genwqe_enqueue_ddcb(cd, req);
+	if (rc != 0)
+		return rc;
+
+	rc = __genwqe_wait_ddcb(cd, req);
+	if (rc < 0)		/* error or signal interrupt */
+		goto err_exit;
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		if (copy_to_user((struct genwqe_debug_data __user *)
+				 (unsigned long)cmd->ddata_addr,
+				 &req->debug_data,
+				 sizeof(struct genwqe_debug_data)))
+			return -EFAULT;
+	}
+
+	/*
+	 * Higher values than 0x102 indicate completion with faults,
+	 * lower values than 0x102 indicate processing faults. Note
+	 * that DDCB might have been purged. E.g. Cntl+C.
+	 */
+	if (cmd->retc != DDCB_RETC_COMPLETE) {
+		/* This might happen e.g. flash read, and needs to be
+		   handled by the upper layer code. */
+		rc = -EBADMSG;	/* not processed/error retc */
+	}
+
+	return rc;
+
+ err_exit:
+	__genwqe_purge_ddcb(cd, req);
+
+	if (ddcb_requ_collect_debug_data(req)) {
+		if (copy_to_user((struct genwqe_debug_data __user *)
+				 (unsigned long)cmd->ddata_addr,
+				 &req->debug_data,
+				 sizeof(struct genwqe_debug_data)))
+			return -EFAULT;
+	}
+	return rc;
+}
+
+/**
+ * genwqe_next_ddcb_ready() - Figure out if the next DDCB is already finished
+ *
+ * We use this as condition for our wait-queue code.
+ */
+static int genwqe_next_ddcb_ready(struct genwqe_dev *cd)
+{
+	unsigned long flags;
+	struct ddcb *pddcb;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	if (queue_empty(queue)) { /* emtpy queue */
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 0;
+	}
+
+	pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
+	if (pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) { /* ddcb ready */
+		spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+		return 1;
+	}
+
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+	return 0;
+}
+
+/**
+ * genwqe_ddcbs_in_flight() - Check how many DDCBs are in flight
+ *
+ * Keep track on the number of DDCBs which ware currently in the
+ * queue. This is needed for statistics as well as conditon if we want
+ * to wait or better do polling in case of no interrupts available.
+ */
+int genwqe_ddcbs_in_flight(struct genwqe_dev *cd)
+{
+	unsigned long flags;
+	int ddcbs_in_flight = 0;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+	ddcbs_in_flight += queue->ddcbs_in_flight;
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+	return ddcbs_in_flight;
+}
+
+static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	int rc, i;
+	struct ddcb *pddcb;
+	u64 val64;
+	unsigned int queue_size;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (genwqe_ddcb_max < 2)
+		return -EINVAL;
+
+	queue_size = roundup(genwqe_ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
+
+	queue->ddcbs_in_flight = 0;  /* statistics */
+	queue->ddcbs_max_in_flight = 0;
+	queue->ddcbs_completed = 0;
+	queue->busy = 0;
+
+	queue->ddcb_seq	  = 0x100; /* start sequence number */
+	queue->ddcb_max	  = genwqe_ddcb_max; /* module parameter */
+	queue->ddcb_vaddr = __genwqe_alloc_consistent(cd, queue_size,
+						&queue->ddcb_daddr);
+	if (queue->ddcb_vaddr == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] **err: could not allocate DDCB **\n", __func__);
+		return -ENOMEM;
+	}
+	memset(queue->ddcb_vaddr, 0, queue_size);
+
+	queue->ddcb_req = kzalloc(sizeof(struct ddcb_requ *) *
+				  queue->ddcb_max, GFP_KERNEL);
+	if (!queue->ddcb_req) {
+		rc = -ENOMEM;
+		goto free_ddcbs;
+	}
+
+	queue->ddcb_waitqs = kzalloc(sizeof(wait_queue_head_t) *
+				     queue->ddcb_max, GFP_KERNEL);
+	if (!queue->ddcb_waitqs) {
+		rc = -ENOMEM;
+		goto free_requs;
+	}
+
+	for (i = 0; i < queue->ddcb_max; i++) {
+		pddcb = &queue->ddcb_vaddr[i];		     /* DDCBs */
+		pddcb->icrc_hsi_shi_32 = DDCB_COMPLETED_BE32;
+		pddcb->retc_16 = cpu_to_be16(0xfff);
+
+		queue->ddcb_req[i] = NULL;		     /* requests */
+		init_waitqueue_head(&queue->ddcb_waitqs[i]); /* waitqueues */
+	}
+
+	queue->ddcb_act  = 0;
+	queue->ddcb_next = 0;	/* queue is empty */
+
+	spin_lock_init(&queue->ddcb_lock);
+	init_waitqueue_head(&queue->ddcb_waitq);
+
+	val64 = ((u64)(queue->ddcb_max - 1) <<  8); /* lastptr */
+	__genwqe_writeq(cd, queue->IO_QUEUE_CONFIG,  0x07);  /* iCRC/vCRC */
+	__genwqe_writeq(cd, queue->IO_QUEUE_SEGMENT, queue->ddcb_daddr);
+	__genwqe_writeq(cd, queue->IO_QUEUE_INITSQN, queue->ddcb_seq);
+	__genwqe_writeq(cd, queue->IO_QUEUE_WRAP,    val64);
+	return 0;
+
+ free_requs:
+	kfree(queue->ddcb_req);
+	queue->ddcb_req = NULL;
+ free_ddcbs:
+	__genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
+				queue->ddcb_daddr);
+	queue->ddcb_vaddr = NULL;
+	queue->ddcb_daddr = 0ull;
+	return -ENODEV;
+
+}
+
+static int ddcb_queue_initialized(struct ddcb_queue *queue)
+{
+	return queue->ddcb_vaddr != NULL;
+}
+
+static void free_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
+{
+	unsigned int queue_size;
+
+	queue_size = roundup(queue->ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
+
+	kfree(queue->ddcb_req);
+	queue->ddcb_req = NULL;
+
+	if (queue->ddcb_vaddr) {
+		__genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
+					queue->ddcb_daddr);
+		queue->ddcb_vaddr = NULL;
+		queue->ddcb_daddr = 0ull;
+	}
+}
+
+static irqreturn_t genwqe_pf_isr(int irq, void *dev_id)
+{
+	u64 gfir;
+	struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/*
+	 * In case of fatal FIR error the queue is stopped, such that
+	 * we can safely check it without risking anything.
+	 */
+	cd->irqs_processed++;
+	wake_up_interruptible(&cd->queue_waitq);
+
+	/*
+	 * Checking for errors before kicking the queue might be
+	 * safer, but slower for the good-case ... See above.
+	 */
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	if ((gfir & GFIR_ERR_TRIGGER) != 0x0) {
+
+		wake_up_interruptible(&cd->health_waitq);
+
+		/*
+		 * By default GFIRs causes recovery actions. This
+		 * count is just for debug when recovery is masked.
+		 */
+		printk_ratelimited(KERN_ERR
+				   "%s %s: [%s] GFIR=%016llx\n",
+				   GENWQE_DEVNAME, dev_name(&pci_dev->dev),
+				   __func__, gfir);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t genwqe_vf_isr(int irq, void *dev_id)
+{
+	struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
+
+	cd->irqs_processed++;
+	wake_up_interruptible(&cd->queue_waitq);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * genwqe_card_thread() - Work thread for the DDCB queue
+ *
+ * The idea is to check if there are DDCBs in processing. If there are
+ * some finished DDCBs, we process them and wakeup the
+ * requestors. Otherwise we give other processes time using
+ * cond_resched().
+ */
+static int genwqe_card_thread(void *data)
+{
+	int should_stop = 0, rc = 0;
+	struct genwqe_dev *cd = (struct genwqe_dev *)data;
+
+	while (!kthread_should_stop()) {
+
+		genwqe_check_ddcb_queue(cd, &cd->queue);
+
+		if (genwqe_polling_enabled) {
+			rc = wait_event_interruptible_timeout(
+				cd->queue_waitq,
+				genwqe_ddcbs_in_flight(cd) ||
+				(should_stop = kthread_should_stop()), 1);
+		} else {
+			rc = wait_event_interruptible_timeout(
+				cd->queue_waitq,
+				genwqe_next_ddcb_ready(cd) ||
+				(should_stop = kthread_should_stop()), HZ);
+		}
+		if (should_stop)
+			break;
+
+		/*
+		 * Avoid soft lockups on heavy loads; we do not want
+		 * to disable our interrupts.
+		 */
+		cond_resched();
+	}
+	return 0;
+}
+
+/**
+ * genwqe_setup_service_layer() - Setup DDCB queue
+ * @cd:         pointer to genwqe device descriptor
+ *
+ * Allocate DDCBs. Configure Service Layer Controller (SLC).
+ *
+ * Return: 0 success
+ */
+int genwqe_setup_service_layer(struct genwqe_dev *cd)
+{
+	int rc;
+	struct ddcb_queue *queue;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (genwqe_is_privileged(cd)) {
+		rc = genwqe_card_reset(cd);
+		if (rc < 0) {
+			dev_err(&pci_dev->dev,
+				"[%s] err: reset failed.\n", __func__);
+			return rc;
+		}
+		genwqe_read_softreset(cd);
+	}
+
+	queue = &cd->queue;
+	queue->IO_QUEUE_CONFIG  = IO_SLC_QUEUE_CONFIG;
+	queue->IO_QUEUE_STATUS  = IO_SLC_QUEUE_STATUS;
+	queue->IO_QUEUE_SEGMENT = IO_SLC_QUEUE_SEGMENT;
+	queue->IO_QUEUE_INITSQN = IO_SLC_QUEUE_INITSQN;
+	queue->IO_QUEUE_OFFSET  = IO_SLC_QUEUE_OFFSET;
+	queue->IO_QUEUE_WRAP    = IO_SLC_QUEUE_WRAP;
+	queue->IO_QUEUE_WTIME   = IO_SLC_QUEUE_WTIME;
+	queue->IO_QUEUE_ERRCNTS = IO_SLC_QUEUE_ERRCNTS;
+	queue->IO_QUEUE_LRW     = IO_SLC_QUEUE_LRW;
+
+	rc = setup_ddcb_queue(cd, queue);
+	if (rc != 0) {
+		rc = -ENODEV;
+		goto err_out;
+	}
+
+	init_waitqueue_head(&cd->queue_waitq);
+	cd->card_thread = kthread_run(genwqe_card_thread, cd,
+				      GENWQE_DEVNAME "%d_thread",
+				      cd->card_idx);
+	if (IS_ERR(cd->card_thread)) {
+		rc = PTR_ERR(cd->card_thread);
+		cd->card_thread = NULL;
+		goto stop_free_queue;
+	}
+
+	rc = genwqe_set_interrupt_capability(cd, GENWQE_MSI_IRQS);
+	if (rc > 0)
+		rc = genwqe_set_interrupt_capability(cd, rc);
+	if (rc != 0) {
+		rc = -ENODEV;
+		goto stop_kthread;
+	}
+
+	/*
+	 * We must have all wait-queues initialized when we enable the
+	 * interrupts. Otherwise we might crash if we get an early
+	 * irq.
+	 */
+	init_waitqueue_head(&cd->health_waitq);
+
+	if (genwqe_is_privileged(cd)) {
+		rc = request_irq(pci_dev->irq, genwqe_pf_isr, IRQF_SHARED,
+				 GENWQE_DEVNAME, cd);
+	} else {
+		rc = request_irq(pci_dev->irq, genwqe_vf_isr, IRQF_SHARED,
+				 GENWQE_DEVNAME, cd);
+	}
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "irq %d not free.\n", pci_dev->irq);
+		goto stop_irq_cap;
+	}
+
+	cd->card_state = GENWQE_CARD_USED;
+	return 0;
+
+ stop_irq_cap:
+	genwqe_reset_interrupt_capability(cd);
+ stop_kthread:
+	kthread_stop(cd->card_thread);
+	cd->card_thread = NULL;
+ stop_free_queue:
+	free_ddcb_queue(cd, queue);
+ err_out:
+	return rc;
+}
+
+/**
+ * queue_wake_up_all() - Handles fatal error case
+ *
+ * The PCI device got unusable and we have to stop all pending
+ * requests as fast as we can. The code after this must purge the
+ * DDCBs in question and ensure that all mappings are freed.
+ */
+static int queue_wake_up_all(struct genwqe_dev *cd)
+{
+	unsigned int i;
+	unsigned long flags;
+	struct ddcb_queue *queue = &cd->queue;
+
+	spin_lock_irqsave(&queue->ddcb_lock, flags);
+
+	for (i = 0; i < queue->ddcb_max; i++)
+		wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
+
+	spin_unlock_irqrestore(&queue->ddcb_lock, flags);
+
+	return 0;
+}
+
+/**
+ * genwqe_finish_queue() - Remove any genwqe devices and user-interfaces
+ *
+ * Relies on the pre-condition that there are no users of the card
+ * device anymore e.g. with open file-descriptors.
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_finish_queue(struct genwqe_dev *cd)
+{
+	int i, rc = 0, in_flight;
+	int waitmax = genwqe_ddcb_software_timeout;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct ddcb_queue *queue = &cd->queue;
+
+	if (!ddcb_queue_initialized(queue))
+		return 0;
+
+	/* Do not wipe out the error state. */
+	if (cd->card_state == GENWQE_CARD_USED)
+		cd->card_state = GENWQE_CARD_UNUSED;
+
+	/* Wake up all requests in the DDCB queue such that they
+	   should be removed nicely. */
+	queue_wake_up_all(cd);
+
+	/* We must wait to get rid of the DDCBs in flight */
+	for (i = 0; i < waitmax; i++) {
+		in_flight = genwqe_ddcbs_in_flight(cd);
+
+		if (in_flight == 0)
+			break;
+
+		dev_dbg(&pci_dev->dev,
+			"  DEBUG [%d/%d] waiting for queue to get empty: "
+			"%d requests!\n", i, waitmax, in_flight);
+
+		/*
+		 * Severe severe error situation: The card itself has
+		 * 16 DDCB queues, each queue has e.g. 32 entries,
+		 * each DDBC has a hardware timeout of currently 250
+		 * msec but the PFs have a hardware timeout of 8 sec
+		 * ... so I take something large.
+		 */
+		msleep(1000);
+	}
+	if (i == waitmax) {
+		dev_err(&pci_dev->dev, "  [%s] err: queue is not empty!!\n",
+			__func__);
+		rc = -EIO;
+	}
+	return rc;
+}
+
+/**
+ * genwqe_release_service_layer() - Shutdown DDCB queue
+ * @cd:       genwqe device descriptor
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_release_service_layer(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!ddcb_queue_initialized(&cd->queue))
+		return 1;
+
+	free_irq(pci_dev->irq, cd);
+	genwqe_reset_interrupt_capability(cd);
+
+	if (cd->card_thread != NULL) {
+		kthread_stop(cd->card_thread);
+		cd->card_thread = NULL;
+	}
+
+	free_ddcb_queue(cd, &cd->queue);
+	return 0;
+}
diff --git a/drivers/misc/genwqe/card_ddcb.h b/drivers/misc/genwqe/card_ddcb.h
new file mode 100644
index 00000000000..c4f26720753
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.h
@@ -0,0 +1,188 @@
+#ifndef __CARD_DDCB_H__
+#define __CARD_DDCB_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#include "genwqe_driver.h"
+#include "card_base.h"
+
+/**
+ * struct ddcb - Device Driver Control Block DDCB
+ * @hsi:        Hardware software interlock
+ * @shi:        Software hardware interlock. Hsi and shi are used to interlock
+ *              software and hardware activities. We are using a compare and
+ *              swap operation to ensure that there are no races when
+ *              activating new DDCBs on the queue, or when we need to
+ *              purge a DDCB from a running queue.
+ * @acfunc:     Accelerator function addresses a unit within the chip
+ * @cmd:        Command to work on
+ * @cmdopts_16: Options for the command
+ * @asiv:       Input data
+ * @asv:        Output data
+ *
+ * The DDCB data format is big endian. Multiple consequtive DDBCs form
+ * a DDCB queue.
+ */
+#define ASIV_LENGTH		104 /* Old specification without ATS field */
+#define ASIV_LENGTH_ATS		96  /* New specification with ATS field */
+#define ASV_LENGTH		64
+
+struct ddcb {
+	union {
+		__be32 icrc_hsi_shi_32;	/* iCRC, Hardware/SW interlock */
+		struct {
+			__be16	icrc_16;
+			u8	hsi;
+			u8	shi;
+		};
+	};
+	u8  pre;		/* Preamble */
+	u8  xdir;		/* Execution Directives */
+	__be16 seqnum_16;	/* Sequence Number */
+
+	u8  acfunc;		/* Accelerator Function.. */
+	u8  cmd;		/* Command. */
+	__be16 cmdopts_16;	/* Command Options */
+	u8  sur;		/* Status Update Rate */
+	u8  psp;		/* Protection Section Pointer */
+	__be16 rsvd_0e_16;	/* Reserved invariant */
+
+	__be64 fwiv_64;		/* Firmware Invariant. */
+
+	union {
+		struct {
+			__be64 ats_64;  /* Address Translation Spec */
+			u8     asiv[ASIV_LENGTH_ATS]; /* New ASIV */
+		} n;
+		u8  __asiv[ASIV_LENGTH];	/* obsolete */
+	};
+	u8     asv[ASV_LENGTH];	/* Appl Spec Variant */
+
+	__be16 rsvd_c0_16;	/* Reserved Variant */
+	__be16 vcrc_16;		/* Variant CRC */
+	__be32 rsvd_32;		/* Reserved unprotected */
+
+	__be64 deque_ts_64;	/* Deque Time Stamp. */
+
+	__be16 retc_16;		/* Return Code */
+	__be16 attn_16;		/* Attention/Extended Error Codes */
+	__be32 progress_32;	/* Progress indicator. */
+
+	__be64 cmplt_ts_64;	/* Completion Time Stamp. */
+
+	/* The following layout matches the new service layer format */
+	__be32 ibdc_32;		/* Inbound Data Count  (* 256) */
+	__be32 obdc_32;		/* Outbound Data Count (* 256) */
+
+	__be64 rsvd_SLH_64;	/* Reserved for hardware */
+	union {			/* private data for driver */
+		u8	priv[8];
+		__be64	priv_64;
+	};
+	__be64 disp_ts_64;	/* Dispatch TimeStamp */
+} __attribute__((__packed__));
+
+/* CRC polynomials for DDCB */
+#define CRC16_POLYNOMIAL	0x1021
+
+/*
+ * SHI: Software to Hardware Interlock
+ *   This 1 byte field is written by software to interlock the
+ *   movement of one queue entry to another with the hardware in the
+ *   chip.
+ */
+#define DDCB_SHI_INTR		0x04 /* Bit 2 */
+#define DDCB_SHI_PURGE		0x02 /* Bit 1 */
+#define DDCB_SHI_NEXT		0x01 /* Bit 0 */
+
+/*
+ * HSI: Hardware to Software interlock
+ * This 1 byte field is written by hardware to interlock the movement
+ * of one queue entry to another with the software in the chip.
+ */
+#define DDCB_HSI_COMPLETED	0x40 /* Bit 6 */
+#define DDCB_HSI_FETCHED	0x04 /* Bit 2 */
+
+/*
+ * Accessing HSI/SHI is done 32-bit wide
+ *   Normally 16-bit access would work too, but on some platforms the
+ *   16 compare and swap operation is not supported. Therefore
+ *   switching to 32-bit such that those platforms will work too.
+ *
+ *                                         iCRC HSI/SHI
+ */
+#define DDCB_INTR_BE32		cpu_to_be32(0x00000004)
+#define DDCB_PURGE_BE32		cpu_to_be32(0x00000002)
+#define DDCB_NEXT_BE32		cpu_to_be32(0x00000001)
+#define DDCB_COMPLETED_BE32	cpu_to_be32(0x00004000)
+#define DDCB_FETCHED_BE32	cpu_to_be32(0x00000400)
+
+/* Definitions of DDCB presets */
+#define DDCB_PRESET_PRE		0x80
+#define ICRC_LENGTH(n)		((n) + 8 + 8 + 8)  /* used ASIV + hdr fields */
+#define VCRC_LENGTH(n)		((n))		   /* used ASV */
+
+/*
+ * Genwqe Scatter Gather list
+ *   Each element has up to 8 entries.
+ *   The chaining element is element 0 cause of prefetching needs.
+ */
+
+/*
+ * 0b0110 Chained descriptor. The descriptor is describing the next
+ * descriptor list.
+ */
+#define SG_CHAINED		(0x6)
+
+/*
+ * 0b0010 First entry of a descriptor list. Start from a Buffer-Empty
+ * condition.
+ */
+#define SG_DATA			(0x2)
+
+/*
+ * 0b0000 Early terminator. This is the last entry on the list
+ * irregardless of the length indicated.
+ */
+#define SG_END_LIST		(0x0)
+
+/**
+ * struct sglist - Scatter gather list
+ * @target_addr:       Either a dma addr of memory to work on or a
+ *                     dma addr or a subsequent sglist block.
+ * @len:               Length of the data block.
+ * @flags:             See above.
+ *
+ * Depending on the command the GenWQE card can use a scatter gather
+ * list to describe the memory it works on. Always 8 sg_entry's form
+ * a block.
+ */
+struct sg_entry {
+	__be64 target_addr;
+	__be32 len;
+	__be32 flags;
+};
+
+#endif /* __CARD_DDCB_H__ */
diff --git a/drivers/misc/genwqe/card_debugfs.c b/drivers/misc/genwqe/card_debugfs.c
new file mode 100644
index 00000000000..0a33ade6410
--- /dev/null
+++ b/drivers/misc/genwqe/card_debugfs.c
@@ -0,0 +1,499 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Debugfs interfaces for the GenWQE card. Help to debug potential
+ * problems. Dump internal chip state for debugging and failure
+ * determination.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+#define GENWQE_DEBUGFS_RO(_name, _showfn)				\
+	static int genwqe_debugfs_##_name##_open(struct inode *inode,	\
+						 struct file *file)	\
+	{								\
+		return single_open(file, _showfn, inode->i_private);	\
+	}								\
+	static const struct file_operations genwqe_##_name##_fops = {	\
+		.open = genwqe_debugfs_##_name##_open,			\
+		.read = seq_read,					\
+		.llseek = seq_lseek,					\
+		.release = single_release,				\
+	}
+
+static void dbg_uidn_show(struct seq_file *s, struct genwqe_reg *regs,
+			  int entries)
+{
+	unsigned int i;
+	u32 v_hi, v_lo;
+
+	for (i = 0; i < entries; i++) {
+		v_hi = (regs[i].val >> 32) & 0xffffffff;
+		v_lo = (regs[i].val)       & 0xffffffff;
+
+		seq_printf(s, "  0x%08x 0x%08x 0x%08x 0x%08x EXT_ERR_REC\n",
+			   regs[i].addr, regs[i].idx, v_hi, v_lo);
+	}
+}
+
+static int curr_dbg_uidn_show(struct seq_file *s, void *unused, int uid)
+{
+	struct genwqe_dev *cd = s->private;
+	int entries;
+	struct genwqe_reg *regs;
+
+	entries = genwqe_ffdc_buff_size(cd, uid);
+	if (entries < 0)
+		return -EINVAL;
+
+	if (entries == 0)
+		return 0;
+
+	regs = kcalloc(entries, sizeof(*regs), GFP_KERNEL);
+	if (regs == NULL)
+		return -ENOMEM;
+
+	genwqe_stop_traps(cd); /* halt the traps while dumping data */
+	genwqe_ffdc_buff_read(cd, uid, regs, entries);
+	genwqe_start_traps(cd);
+
+	dbg_uidn_show(s, regs, entries);
+	kfree(regs);
+	return 0;
+}
+
+static int genwqe_curr_dbg_uid0_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 0);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid0, genwqe_curr_dbg_uid0_show);
+
+static int genwqe_curr_dbg_uid1_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 1);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid1, genwqe_curr_dbg_uid1_show);
+
+static int genwqe_curr_dbg_uid2_show(struct seq_file *s, void *unused)
+{
+	return curr_dbg_uidn_show(s, unused, 2);
+}
+
+GENWQE_DEBUGFS_RO(curr_dbg_uid2, genwqe_curr_dbg_uid2_show);
+
+static int prev_dbg_uidn_show(struct seq_file *s, void *unused, int uid)
+{
+	struct genwqe_dev *cd = s->private;
+
+	dbg_uidn_show(s, cd->ffdc[uid].regs,  cd->ffdc[uid].entries);
+	return 0;
+}
+
+static int genwqe_prev_dbg_uid0_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 0);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid0, genwqe_prev_dbg_uid0_show);
+
+static int genwqe_prev_dbg_uid1_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 1);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid1, genwqe_prev_dbg_uid1_show);
+
+static int genwqe_prev_dbg_uid2_show(struct seq_file *s, void *unused)
+{
+	return prev_dbg_uidn_show(s, unused, 2);
+}
+
+GENWQE_DEBUGFS_RO(prev_dbg_uid2, genwqe_prev_dbg_uid2_show);
+
+static int genwqe_curr_regs_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct genwqe_reg *regs;
+
+	regs = kcalloc(GENWQE_FFDC_REGS, sizeof(*regs), GFP_KERNEL);
+	if (regs == NULL)
+		return -ENOMEM;
+
+	genwqe_stop_traps(cd);
+	genwqe_read_ffdc_regs(cd, regs, GENWQE_FFDC_REGS, 1);
+	genwqe_start_traps(cd);
+
+	for (i = 0; i < GENWQE_FFDC_REGS; i++) {
+		if (regs[i].addr == 0xffffffff)
+			break;  /* invalid entries */
+
+		if (regs[i].val == 0x0ull)
+			continue;  /* do not print 0x0 FIRs */
+
+		seq_printf(s, "  0x%08x 0x%016llx\n",
+			   regs[i].addr, regs[i].val);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(curr_regs, genwqe_curr_regs_show);
+
+static int genwqe_prev_regs_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct genwqe_reg *regs = cd->ffdc[GENWQE_DBG_REGS].regs;
+
+	if (regs == NULL)
+		return -EINVAL;
+
+	for (i = 0; i < GENWQE_FFDC_REGS; i++) {
+		if (regs[i].addr == 0xffffffff)
+			break;  /* invalid entries */
+
+		if (regs[i].val == 0x0ull)
+			continue;  /* do not print 0x0 FIRs */
+
+		seq_printf(s, "  0x%08x 0x%016llx\n",
+			   regs[i].addr, regs[i].val);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(prev_regs, genwqe_prev_regs_show);
+
+static int genwqe_jtimer_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int vf_num;
+	u64 jtimer;
+
+	jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 0);
+	seq_printf(s, "  PF   0x%016llx %d msec\n", jtimer,
+		   genwqe_pf_jobtimeout_msec);
+
+	for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) {
+		jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
+					  vf_num + 1);
+		seq_printf(s, "  VF%-2d 0x%016llx %d msec\n", vf_num, jtimer,
+			   cd->vf_jobtimeout_msec[vf_num]);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(jtimer, genwqe_jtimer_show);
+
+static int genwqe_queue_working_time_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int vf_num;
+	u64 t;
+
+	t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, 0);
+	seq_printf(s, "  PF   0x%016llx\n", t);
+
+	for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) {
+		t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, vf_num + 1);
+		seq_printf(s, "  VF%-2d 0x%016llx\n", vf_num, t);
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(queue_working_time, genwqe_queue_working_time_show);
+
+static int genwqe_ddcb_info_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	unsigned int i;
+	struct ddcb_queue *queue;
+	struct ddcb *pddcb;
+
+	queue = &cd->queue;
+	seq_puts(s, "DDCB QUEUE:\n");
+	seq_printf(s, "  ddcb_max:            %d\n"
+		   "  ddcb_daddr:          %016llx - %016llx\n"
+		   "  ddcb_vaddr:          %016llx\n"
+		   "  ddcbs_in_flight:     %u\n"
+		   "  ddcbs_max_in_flight: %u\n"
+		   "  ddcbs_completed:     %u\n"
+		   "  busy:                %u\n"
+		   "  irqs_processed:      %u\n",
+		   queue->ddcb_max, (long long)queue->ddcb_daddr,
+		   (long long)queue->ddcb_daddr +
+		   (queue->ddcb_max * DDCB_LENGTH),
+		   (long long)queue->ddcb_vaddr, queue->ddcbs_in_flight,
+		   queue->ddcbs_max_in_flight, queue->ddcbs_completed,
+		   queue->busy, cd->irqs_processed);
+
+	/* Hardware State */
+	seq_printf(s, "  0x%08x 0x%016llx IO_QUEUE_CONFIG\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_STATUS\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_SEGMENT\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_INITSQN\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_WRAP\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_OFFSET\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_WTIME\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_ERRCNTS\n"
+		   "  0x%08x 0x%016llx IO_QUEUE_LRW\n",
+		   queue->IO_QUEUE_CONFIG,
+		   __genwqe_readq(cd, queue->IO_QUEUE_CONFIG),
+		   queue->IO_QUEUE_STATUS,
+		   __genwqe_readq(cd, queue->IO_QUEUE_STATUS),
+		   queue->IO_QUEUE_SEGMENT,
+		   __genwqe_readq(cd, queue->IO_QUEUE_SEGMENT),
+		   queue->IO_QUEUE_INITSQN,
+		   __genwqe_readq(cd, queue->IO_QUEUE_INITSQN),
+		   queue->IO_QUEUE_WRAP,
+		   __genwqe_readq(cd, queue->IO_QUEUE_WRAP),
+		   queue->IO_QUEUE_OFFSET,
+		   __genwqe_readq(cd, queue->IO_QUEUE_OFFSET),
+		   queue->IO_QUEUE_WTIME,
+		   __genwqe_readq(cd, queue->IO_QUEUE_WTIME),
+		   queue->IO_QUEUE_ERRCNTS,
+		   __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS),
+		   queue->IO_QUEUE_LRW,
+		   __genwqe_readq(cd, queue->IO_QUEUE_LRW));
+
+	seq_printf(s, "DDCB list (ddcb_act=%d/ddcb_next=%d):\n",
+		   queue->ddcb_act, queue->ddcb_next);
+
+	pddcb = queue->ddcb_vaddr;
+	for (i = 0; i < queue->ddcb_max; i++) {
+		seq_printf(s, "  %-3d: RETC=%03x SEQ=%04x HSI/SHI=%02x/%02x ",
+			   i, be16_to_cpu(pddcb->retc_16),
+			   be16_to_cpu(pddcb->seqnum_16),
+			   pddcb->hsi, pddcb->shi);
+		seq_printf(s, "PRIV=%06llx CMD=%02x\n",
+			   be64_to_cpu(pddcb->priv_64), pddcb->cmd);
+		pddcb++;
+	}
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(ddcb_info, genwqe_ddcb_info_show);
+
+static int genwqe_info_show(struct seq_file *s, void *unused)
+{
+	struct genwqe_dev *cd = s->private;
+	u16 val16, type;
+	u64 app_id, slu_id, bitstream = -1;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	app_id = __genwqe_readq(cd, IO_APP_UNITCFG);
+
+	if (genwqe_is_privileged(cd))
+		bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM);
+
+	val16 = (u16)(slu_id & 0x0fLLU);
+	type  = (u16)((slu_id >> 20) & 0xffLLU);
+
+	seq_printf(s, "%s driver version: %s\n"
+		   "    Device Name/Type: %s %s CardIdx: %d\n"
+		   "    SLU/APP Config  : 0x%016llx/0x%016llx\n"
+		   "    Build Date      : %u/%x/%u\n"
+		   "    Base Clock      : %u MHz\n"
+		   "    Arch/SVN Release: %u/%llx\n"
+		   "    Bitstream       : %llx\n",
+		   GENWQE_DEVNAME, DRV_VERS_STRING, dev_name(&pci_dev->dev),
+		   genwqe_is_privileged(cd) ?
+		   "Physical" : "Virtual or no SR-IOV",
+		   cd->card_idx, slu_id, app_id,
+		   (u16)((slu_id >> 12) & 0x0fLLU),	   /* month */
+		   (u16)((slu_id >>  4) & 0xffLLU),	   /* day */
+		   (u16)((slu_id >> 16) & 0x0fLLU) + 2010, /* year */
+		   genwqe_base_clock_frequency(cd),
+		   (u16)((slu_id >> 32) & 0xffLLU), slu_id >> 40,
+		   bitstream);
+
+	return 0;
+}
+
+GENWQE_DEBUGFS_RO(info, genwqe_info_show);
+
+int genwqe_init_debugfs(struct genwqe_dev *cd)
+{
+	struct dentry *root;
+	struct dentry *file;
+	int ret;
+	char card_name[64];
+	char name[64];
+	unsigned int i;
+
+	sprintf(card_name, "%s%d_card", GENWQE_DEVNAME, cd->card_idx);
+
+	root = debugfs_create_dir(card_name, cd->debugfs_genwqe);
+	if (!root) {
+		ret = -ENOMEM;
+		goto err0;
+	}
+
+	/* non privileged interfaces are done here */
+	file = debugfs_create_file("ddcb_info", S_IRUGO, root, cd,
+				   &genwqe_ddcb_info_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("info", S_IRUGO, root, cd,
+				   &genwqe_info_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_x64("err_inject", 0666, root, &cd->err_inject);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_u32("ddcb_software_timeout", 0666, root,
+				  &cd->ddcb_software_timeout);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_u32("kill_timeout", 0666, root,
+				  &cd->kill_timeout);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	/* privileged interfaces follow here */
+	if (!genwqe_is_privileged(cd)) {
+		cd->debugfs_root = root;
+		return 0;
+	}
+
+	file = debugfs_create_file("curr_regs", S_IRUGO, root, cd,
+				   &genwqe_curr_regs_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("curr_dbg_uid0", S_IRUGO, root, cd,
+				   &genwqe_curr_dbg_uid0_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("curr_dbg_uid1", S_IRUGO, root, cd,
+				   &genwqe_curr_dbg_uid1_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("curr_dbg_uid2", S_IRUGO, root, cd,
+				   &genwqe_curr_dbg_uid2_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_regs", S_IRUGO, root, cd,
+				   &genwqe_prev_regs_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_dbg_uid0", S_IRUGO, root, cd,
+				   &genwqe_prev_dbg_uid0_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_dbg_uid1", S_IRUGO, root, cd,
+				   &genwqe_prev_dbg_uid1_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("prev_dbg_uid2", S_IRUGO, root, cd,
+				   &genwqe_prev_dbg_uid2_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	for (i = 0; i <  GENWQE_MAX_VFS; i++) {
+		sprintf(name, "vf%u_jobtimeout_msec", i);
+
+		file = debugfs_create_u32(name, 0666, root,
+					  &cd->vf_jobtimeout_msec[i]);
+		if (!file) {
+			ret = -ENOMEM;
+			goto err1;
+		}
+	}
+
+	file = debugfs_create_file("jobtimer", S_IRUGO, root, cd,
+				   &genwqe_jtimer_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_file("queue_working_time", S_IRUGO, root, cd,
+				   &genwqe_queue_working_time_fops);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	file = debugfs_create_u32("skip_recovery", 0666, root,
+				  &cd->skip_recovery);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	cd->debugfs_root = root;
+	return 0;
+err1:
+	debugfs_remove_recursive(root);
+err0:
+	return ret;
+}
+
+void genqwe_exit_debugfs(struct genwqe_dev *cd)
+{
+	debugfs_remove_recursive(cd->debugfs_root);
+}
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
new file mode 100644
index 00000000000..1d2f163a190
--- /dev/null
+++ b/drivers/misc/genwqe/card_dev.c
@@ -0,0 +1,1403 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Character device representation of the GenWQE device. This allows
+ * user-space applications to communicate with the card.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/atomic.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+static int genwqe_open_files(struct genwqe_dev *cd)
+{
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	rc = list_empty(&cd->file_list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return !rc;
+}
+
+static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
+{
+	unsigned long flags;
+
+	cfile->owner = current;
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_add(&cfile->list, &cd->file_list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+}
+
+static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_del(&cfile->list);
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+
+	return 0;
+}
+
+static void genwqe_add_pin(struct genwqe_file *cfile, struct dma_mapping *m)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+	list_add(&m->pin_list, &cfile->pin_list);
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+}
+
+static int genwqe_del_pin(struct genwqe_file *cfile, struct dma_mapping *m)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+	list_del(&m->pin_list);
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+
+	return 0;
+}
+
+/**
+ * genwqe_search_pin() - Search for the mapping for a userspace address
+ * @cfile:	Descriptor of opened file
+ * @u_addr:	User virtual address
+ * @size:	Size of buffer
+ * @dma_addr:	DMA address to be updated
+ *
+ * Return: Pointer to the corresponding mapping	NULL if not found
+ */
+static struct dma_mapping *genwqe_search_pin(struct genwqe_file *cfile,
+					    unsigned long u_addr,
+					    unsigned int size,
+					    void **virt_addr)
+{
+	unsigned long flags;
+	struct dma_mapping *m;
+
+	spin_lock_irqsave(&cfile->pin_lock, flags);
+
+	list_for_each_entry(m, &cfile->pin_list, pin_list) {
+		if ((((u64)m->u_vaddr) <= (u_addr)) &&
+		    (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
+
+			if (virt_addr)
+				*virt_addr = m->k_vaddr +
+					(u_addr - (u64)m->u_vaddr);
+
+			spin_unlock_irqrestore(&cfile->pin_lock, flags);
+			return m;
+		}
+	}
+	spin_unlock_irqrestore(&cfile->pin_lock, flags);
+	return NULL;
+}
+
+static void __genwqe_add_mapping(struct genwqe_file *cfile,
+			      struct dma_mapping *dma_map)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_add(&dma_map->card_list, &cfile->map_list);
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+}
+
+static void __genwqe_del_mapping(struct genwqe_file *cfile,
+			      struct dma_mapping *dma_map)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_del(&dma_map->card_list);
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+}
+
+
+/**
+ * __genwqe_search_mapping() - Search for the mapping for a userspace address
+ * @cfile:	descriptor of opened file
+ * @u_addr:	user virtual address
+ * @size:	size of buffer
+ * @dma_addr:	DMA address to be updated
+ * Return: Pointer to the corresponding mapping	NULL if not found
+ */
+static struct dma_mapping *__genwqe_search_mapping(struct genwqe_file *cfile,
+						   unsigned long u_addr,
+						   unsigned int size,
+						   dma_addr_t *dma_addr,
+						   void **virt_addr)
+{
+	unsigned long flags;
+	struct dma_mapping *m;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+
+	spin_lock_irqsave(&cfile->map_lock, flags);
+	list_for_each_entry(m, &cfile->map_list, card_list) {
+
+		if ((((u64)m->u_vaddr) <= (u_addr)) &&
+		    (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
+
+			/* match found: current is as expected and
+			   addr is in range */
+			if (dma_addr)
+				*dma_addr = m->dma_addr +
+					(u_addr - (u64)m->u_vaddr);
+
+			if (virt_addr)
+				*virt_addr = m->k_vaddr +
+					(u_addr - (u64)m->u_vaddr);
+
+			spin_unlock_irqrestore(&cfile->map_lock, flags);
+			return m;
+		}
+	}
+	spin_unlock_irqrestore(&cfile->map_lock, flags);
+
+	dev_err(&pci_dev->dev,
+		"[%s] Entry not found: u_addr=%lx, size=%x\n",
+		__func__, u_addr, size);
+
+	return NULL;
+}
+
+static void genwqe_remove_mappings(struct genwqe_file *cfile)
+{
+	int i = 0;
+	struct list_head *node, *next;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+
+	list_for_each_safe(node, next, &cfile->map_list) {
+		dma_map = list_entry(node, struct dma_mapping, card_list);
+
+		list_del_init(&dma_map->card_list);
+
+		/*
+		 * This is really a bug, because those things should
+		 * have been already tidied up.
+		 *
+		 * GENWQE_MAPPING_RAW should have been removed via mmunmap().
+		 * GENWQE_MAPPING_SGL_TEMP should be removed by tidy up code.
+		 */
+		dev_err(&pci_dev->dev,
+			"[%s] %d. cleanup mapping: u_vaddr=%p "
+			"u_kaddr=%016lx dma_addr=%lx\n", __func__, i++,
+			dma_map->u_vaddr, (unsigned long)dma_map->k_vaddr,
+			(unsigned long)dma_map->dma_addr);
+
+		if (dma_map->type == GENWQE_MAPPING_RAW) {
+			/* we allocated this dynamically */
+			__genwqe_free_consistent(cd, dma_map->size,
+						dma_map->k_vaddr,
+						dma_map->dma_addr);
+			kfree(dma_map);
+		} else if (dma_map->type == GENWQE_MAPPING_SGL_TEMP) {
+			/* we use dma_map statically from the request */
+			genwqe_user_vunmap(cd, dma_map, NULL);
+		}
+	}
+}
+
+static void genwqe_remove_pinnings(struct genwqe_file *cfile)
+{
+	struct list_head *node, *next;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+
+	list_for_each_safe(node, next, &cfile->pin_list) {
+		dma_map = list_entry(node, struct dma_mapping, pin_list);
+
+		/*
+		 * This is not a bug, because a killed processed might
+		 * not call the unpin ioctl, which is supposed to free
+		 * the resources.
+		 *
+		 * Pinnings are dymically allocated and need to be
+		 * deleted.
+		 */
+		list_del_init(&dma_map->pin_list);
+		genwqe_user_vunmap(cd, dma_map, NULL);
+		kfree(dma_map);
+	}
+}
+
+/**
+ * genwqe_kill_fasync() - Send signal to all processes with open GenWQE files
+ *
+ * E.g. genwqe_send_signal(cd, SIGIO);
+ */
+static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig)
+{
+	unsigned int files = 0;
+	unsigned long flags;
+	struct genwqe_file *cfile;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_for_each_entry(cfile, &cd->file_list, list) {
+		if (cfile->async_queue)
+			kill_fasync(&cfile->async_queue, sig, POLL_HUP);
+		files++;
+	}
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return files;
+}
+
+static int genwqe_force_sig(struct genwqe_dev *cd, int sig)
+{
+	unsigned int files = 0;
+	unsigned long flags;
+	struct genwqe_file *cfile;
+
+	spin_lock_irqsave(&cd->file_lock, flags);
+	list_for_each_entry(cfile, &cd->file_list, list) {
+		force_sig(sig, cfile->owner);
+		files++;
+	}
+	spin_unlock_irqrestore(&cd->file_lock, flags);
+	return files;
+}
+
+/**
+ * genwqe_open() - file open
+ * @inode:      file system information
+ * @filp:	file handle
+ *
+ * This function is executed whenever an application calls
+ * open("/dev/genwqe",..).
+ *
+ * Return: 0 if successful or <0 if errors
+ */
+static int genwqe_open(struct inode *inode, struct file *filp)
+{
+	struct genwqe_dev *cd;
+	struct genwqe_file *cfile;
+	struct pci_dev *pci_dev;
+
+	cfile = kzalloc(sizeof(*cfile), GFP_KERNEL);
+	if (cfile == NULL)
+		return -ENOMEM;
+
+	cd = container_of(inode->i_cdev, struct genwqe_dev, cdev_genwqe);
+	pci_dev = cd->pci_dev;
+	cfile->cd = cd;
+	cfile->filp = filp;
+	cfile->client = NULL;
+
+	spin_lock_init(&cfile->map_lock);  /* list of raw memory allocations */
+	INIT_LIST_HEAD(&cfile->map_list);
+
+	spin_lock_init(&cfile->pin_lock);  /* list of user pinned memory */
+	INIT_LIST_HEAD(&cfile->pin_list);
+
+	filp->private_data = cfile;
+
+	genwqe_add_file(cd, cfile);
+	return 0;
+}
+
+/**
+ * genwqe_fasync() - Setup process to receive SIGIO.
+ * @fd:        file descriptor
+ * @filp:      file handle
+ * @mode:      file mode
+ *
+ * Sending a signal is working as following:
+ *
+ * if (cdev->async_queue)
+ *         kill_fasync(&cdev->async_queue, SIGIO, POLL_IN);
+ *
+ * Some devices also implement asynchronous notification to indicate
+ * when the device can be written; in this case, of course,
+ * kill_fasync must be called with a mode of POLL_OUT.
+ */
+static int genwqe_fasync(int fd, struct file *filp, int mode)
+{
+	struct genwqe_file *cdev = (struct genwqe_file *)filp->private_data;
+	return fasync_helper(fd, filp, mode, &cdev->async_queue);
+}
+
+
+/**
+ * genwqe_release() - file close
+ * @inode:      file system information
+ * @filp:       file handle
+ *
+ * This function is executed whenever an application calls 'close(fd_genwqe)'
+ *
+ * Return: always 0
+ */
+static int genwqe_release(struct inode *inode, struct file *filp)
+{
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+
+	/* there must be no entries in these lists! */
+	genwqe_remove_mappings(cfile);
+	genwqe_remove_pinnings(cfile);
+
+	/* remove this filp from the asynchronously notified filp's */
+	genwqe_fasync(-1, filp, 0);
+
+	/*
+	 * For this to work we must not release cd when this cfile is
+	 * not yet released, otherwise the list entry is invalid,
+	 * because the list itself gets reinstantiated!
+	 */
+	genwqe_del_file(cd, cfile);
+	kfree(cfile);
+	return 0;
+}
+
+static void genwqe_vma_open(struct vm_area_struct *vma)
+{
+	/* nothing ... */
+}
+
+/**
+ * genwqe_vma_close() - Called each time when vma is unmapped
+ *
+ * Free memory which got allocated by GenWQE mmap().
+ */
+static void genwqe_vma_close(struct vm_area_struct *vma)
+{
+	unsigned long vsize = vma->vm_end - vma->vm_start;
+	struct inode *inode = vma->vm_file->f_dentry->d_inode;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = container_of(inode->i_cdev, struct genwqe_dev,
+					    cdev_genwqe);
+	struct pci_dev *pci_dev = cd->pci_dev;
+	dma_addr_t d_addr = 0;
+	struct genwqe_file *cfile = vma->vm_private_data;
+
+	dma_map = __genwqe_search_mapping(cfile, vma->vm_start, vsize,
+					 &d_addr, NULL);
+	if (dma_map == NULL) {
+		dev_err(&pci_dev->dev,
+			"  [%s] err: mapping not found: v=%lx, p=%lx s=%lx\n",
+			__func__, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
+			vsize);
+		return;
+	}
+	__genwqe_del_mapping(cfile, dma_map);
+	__genwqe_free_consistent(cd, dma_map->size, dma_map->k_vaddr,
+				 dma_map->dma_addr);
+	kfree(dma_map);
+}
+
+static struct vm_operations_struct genwqe_vma_ops = {
+	.open   = genwqe_vma_open,
+	.close  = genwqe_vma_close,
+};
+
+/**
+ * genwqe_mmap() - Provide contignous buffers to userspace
+ *
+ * We use mmap() to allocate contignous buffers used for DMA
+ * transfers. After the buffer is allocated we remap it to user-space
+ * and remember a reference to our dma_mapping data structure, where
+ * we store the associated DMA address and allocated size.
+ *
+ * When we receive a DDCB execution request with the ATS bits set to
+ * plain buffer, we lookup our dma_mapping list to find the
+ * corresponding DMA address for the associated user-space address.
+ */
+static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int rc;
+	unsigned long pfn, vsize = vma->vm_end - vma->vm_start;
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+	struct dma_mapping *dma_map;
+
+	if (vsize == 0)
+		return -EINVAL;
+
+	if (get_order(vsize) > MAX_ORDER)
+		return -ENOMEM;
+
+	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
+	if (dma_map == NULL)
+		return -ENOMEM;
+
+	genwqe_mapping_init(dma_map, GENWQE_MAPPING_RAW);
+	dma_map->u_vaddr = (void *)vma->vm_start;
+	dma_map->size = vsize;
+	dma_map->nr_pages = DIV_ROUND_UP(vsize, PAGE_SIZE);
+	dma_map->k_vaddr = __genwqe_alloc_consistent(cd, vsize,
+						     &dma_map->dma_addr);
+	if (dma_map->k_vaddr == NULL) {
+		rc = -ENOMEM;
+		goto free_dma_map;
+	}
+
+	if (capable(CAP_SYS_ADMIN) && (vsize > sizeof(dma_addr_t)))
+		*(dma_addr_t *)dma_map->k_vaddr = dma_map->dma_addr;
+
+	pfn = virt_to_phys(dma_map->k_vaddr) >> PAGE_SHIFT;
+	rc = remap_pfn_range(vma,
+			     vma->vm_start,
+			     pfn,
+			     vsize,
+			     vma->vm_page_prot);
+	if (rc != 0) {
+		rc = -EFAULT;
+		goto free_dma_mem;
+	}
+
+	vma->vm_private_data = cfile;
+	vma->vm_ops = &genwqe_vma_ops;
+	__genwqe_add_mapping(cfile, dma_map);
+
+	return 0;
+
+ free_dma_mem:
+	__genwqe_free_consistent(cd, dma_map->size,
+				dma_map->k_vaddr,
+				dma_map->dma_addr);
+ free_dma_map:
+	kfree(dma_map);
+	return rc;
+}
+
+/**
+ * do_flash_update() - Excute flash update (write image or CVPD)
+ * @cd:        genwqe device
+ * @load:      details about image load
+ *
+ * Return: 0 if successful
+ */
+
+#define	FLASH_BLOCK	0x40000	/* we use 256k blocks */
+
+static int do_flash_update(struct genwqe_file *cfile,
+			   struct genwqe_bitstream *load)
+{
+	int rc = 0;
+	int blocks_to_flash;
+	dma_addr_t dma_addr;
+	u64 flash = 0;
+	size_t tocopy = 0;
+	u8 __user *buf;
+	u8 *xbuf;
+	u32 crc;
+	u8 cmdopts;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if ((load->size & 0x3) != 0)
+		return -EINVAL;
+
+	if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0)
+		return -EINVAL;
+
+	/* FIXME Bits have changed for new service layer! */
+	switch ((char)load->partition) {
+	case '0':
+		cmdopts = 0x14;
+		break;		/* download/erase_first/part_0 */
+	case '1':
+		cmdopts = 0x1C;
+		break;		/* download/erase_first/part_1 */
+	case 'v':
+		cmdopts = 0x0C;
+		break;		/* download/erase_first/vpd */
+	default:
+		return -EINVAL;
+	}
+
+	buf = (u8 __user *)load->data_addr;
+	xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
+	if (xbuf == NULL)
+		return -ENOMEM;
+
+	blocks_to_flash = load->size / FLASH_BLOCK;
+	while (load->size) {
+		struct genwqe_ddcb_cmd *req;
+
+		/*
+		 * We must be 4 byte aligned. Buffer must be 0 appened
+		 * to have defined values when calculating CRC.
+		 */
+		tocopy = min_t(size_t, load->size, FLASH_BLOCK);
+
+		rc = copy_from_user(xbuf, buf, tocopy);
+		if (rc) {
+			rc = -EFAULT;
+			goto free_buffer;
+		}
+		crc = genwqe_crc32(xbuf, tocopy, 0xffffffff);
+
+		dev_dbg(&pci_dev->dev,
+			"[%s] DMA: %lx CRC: %08x SZ: %ld %d\n",
+			__func__, (unsigned long)dma_addr, crc, tocopy,
+			blocks_to_flash);
+
+		/* prepare DDCB for SLU process */
+		req = ddcb_requ_alloc();
+		if (req == NULL) {
+			rc = -ENOMEM;
+			goto free_buffer;
+		}
+
+		req->cmd = SLCMD_MOVE_FLASH;
+		req->cmdopts = cmdopts;
+
+		/* prepare invariant values */
+		if (genwqe_get_slu_id(cd) <= 0x2) {
+			*(__be64 *)&req->__asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be64 *)&req->__asiv[8]  = cpu_to_be64(tocopy);
+			*(__be64 *)&req->__asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&req->__asiv[24] = cpu_to_be32(0);
+			req->__asiv[24]	       = load->uid;
+			*(__be32 *)&req->__asiv[28] = cpu_to_be32(crc);
+
+			/* for simulation only */
+			*(__be64 *)&req->__asiv[88] = cpu_to_be64(load->slu_id);
+			*(__be64 *)&req->__asiv[96] = cpu_to_be64(load->app_id);
+			req->asiv_length = 32; /* bytes included in crc calc */
+		} else {	/* setup DDCB for ATS architecture */
+			*(__be64 *)&req->asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be32 *)&req->asiv[8]  = cpu_to_be32(tocopy);
+			*(__be32 *)&req->asiv[12] = cpu_to_be32(0); /* resvd */
+			*(__be64 *)&req->asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&req->asiv[24] = cpu_to_be32(load->uid<<24);
+			*(__be32 *)&req->asiv[28] = cpu_to_be32(crc);
+
+			/* for simulation only */
+			*(__be64 *)&req->asiv[80] = cpu_to_be64(load->slu_id);
+			*(__be64 *)&req->asiv[88] = cpu_to_be64(load->app_id);
+
+			/* Rd only */
+			req->ats = 0x4ULL << 44;
+			req->asiv_length = 40; /* bytes included in crc calc */
+		}
+		req->asv_length  = 8;
+
+		/* For Genwqe5 we get back the calculated CRC */
+		*(u64 *)&req->asv[0] = 0ULL;			/* 0x80 */
+
+		rc = __genwqe_execute_raw_ddcb(cd, req);
+
+		load->retc = req->retc;
+		load->attn = req->attn;
+		load->progress = req->progress;
+
+		if (rc < 0) {
+			ddcb_requ_free(req);
+			goto free_buffer;
+		}
+
+		if (req->retc != DDCB_RETC_COMPLETE) {
+			rc = -EIO;
+			ddcb_requ_free(req);
+			goto free_buffer;
+		}
+
+		load->size  -= tocopy;
+		flash += tocopy;
+		buf += tocopy;
+		blocks_to_flash--;
+		ddcb_requ_free(req);
+	}
+
+ free_buffer:
+	__genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
+	return rc;
+}
+
+static int do_flash_read(struct genwqe_file *cfile,
+			 struct genwqe_bitstream *load)
+{
+	int rc, blocks_to_flash;
+	dma_addr_t dma_addr;
+	u64 flash = 0;
+	size_t tocopy = 0;
+	u8 __user *buf;
+	u8 *xbuf;
+	u8 cmdopts;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cd->pci_dev;
+	struct genwqe_ddcb_cmd *cmd;
+
+	if ((load->size & 0x3) != 0)
+		return -EINVAL;
+
+	if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0)
+		return -EINVAL;
+
+	/* FIXME Bits have changed for new service layer! */
+	switch ((char)load->partition) {
+	case '0':
+		cmdopts = 0x12;
+		break;		/* upload/part_0 */
+	case '1':
+		cmdopts = 0x1A;
+		break;		/* upload/part_1 */
+	case 'v':
+		cmdopts = 0x0A;
+		break;		/* upload/vpd */
+	default:
+		return -EINVAL;
+	}
+
+	buf = (u8 __user *)load->data_addr;
+	xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
+	if (xbuf == NULL)
+		return -ENOMEM;
+
+	blocks_to_flash = load->size / FLASH_BLOCK;
+	while (load->size) {
+		/*
+		 * We must be 4 byte aligned. Buffer must be 0 appened
+		 * to have defined values when calculating CRC.
+		 */
+		tocopy = min_t(size_t, load->size, FLASH_BLOCK);
+
+		dev_dbg(&pci_dev->dev,
+			"[%s] DMA: %lx SZ: %ld %d\n",
+			__func__, (unsigned long)dma_addr, tocopy,
+			blocks_to_flash);
+
+		/* prepare DDCB for SLU process */
+		cmd = ddcb_requ_alloc();
+		if (cmd == NULL) {
+			rc = -ENOMEM;
+			goto free_buffer;
+		}
+		cmd->cmd = SLCMD_MOVE_FLASH;
+		cmd->cmdopts = cmdopts;
+
+		/* prepare invariant values */
+		if (genwqe_get_slu_id(cd) <= 0x2) {
+			*(__be64 *)&cmd->__asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be64 *)&cmd->__asiv[8]  = cpu_to_be64(tocopy);
+			*(__be64 *)&cmd->__asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&cmd->__asiv[24] = cpu_to_be32(0);
+			cmd->__asiv[24] = load->uid;
+			*(__be32 *)&cmd->__asiv[28] = cpu_to_be32(0) /* CRC */;
+			cmd->asiv_length = 32; /* bytes included in crc calc */
+		} else {	/* setup DDCB for ATS architecture */
+			*(__be64 *)&cmd->asiv[0]  = cpu_to_be64(dma_addr);
+			*(__be32 *)&cmd->asiv[8]  = cpu_to_be32(tocopy);
+			*(__be32 *)&cmd->asiv[12] = cpu_to_be32(0); /* resvd */
+			*(__be64 *)&cmd->asiv[16] = cpu_to_be64(flash);
+			*(__be32 *)&cmd->asiv[24] = cpu_to_be32(load->uid<<24);
+			*(__be32 *)&cmd->asiv[28] = cpu_to_be32(0); /* CRC */
+
+			/* rd/wr */
+			cmd->ats = 0x5ULL << 44;
+			cmd->asiv_length = 40; /* bytes included in crc calc */
+		}
+		cmd->asv_length  = 8;
+
+		/* we only get back the calculated CRC */
+		*(u64 *)&cmd->asv[0] = 0ULL;	/* 0x80 */
+
+		rc = __genwqe_execute_raw_ddcb(cd, cmd);
+
+		load->retc = cmd->retc;
+		load->attn = cmd->attn;
+		load->progress = cmd->progress;
+
+		if ((rc < 0) && (rc != -EBADMSG)) {
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		rc = copy_to_user(buf, xbuf, tocopy);
+		if (rc) {
+			rc = -EFAULT;
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		/* We know that we can get retc 0x104 with CRC err */
+		if (((cmd->retc == DDCB_RETC_FAULT) &&
+		     (cmd->attn != 0x02)) ||  /* Normally ignore CRC error */
+		    ((cmd->retc == DDCB_RETC_COMPLETE) &&
+		     (cmd->attn != 0x00))) {  /* Everything was fine */
+			rc = -EIO;
+			ddcb_requ_free(cmd);
+			goto free_buffer;
+		}
+
+		load->size  -= tocopy;
+		flash += tocopy;
+		buf += tocopy;
+		blocks_to_flash--;
+		ddcb_requ_free(cmd);
+	}
+	rc = 0;
+
+ free_buffer:
+	__genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
+	return rc;
+}
+
+static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
+{
+	int rc;
+	struct genwqe_dev *cd = cfile->cd;
+	struct pci_dev *pci_dev = cfile->cd->pci_dev;
+	struct dma_mapping *dma_map;
+	unsigned long map_addr;
+	unsigned long map_size;
+
+	if ((m->addr == 0x0) || (m->size == 0))
+		return -EINVAL;
+
+	map_addr = (m->addr & PAGE_MASK);
+	map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
+
+	dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
+	if (dma_map == NULL)
+		return -ENOMEM;
+
+	genwqe_mapping_init(dma_map, GENWQE_MAPPING_SGL_PINNED);
+	rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size, NULL);
+	if (rc != 0) {
+		dev_err(&pci_dev->dev,
+			"[%s] genwqe_user_vmap rc=%d\n", __func__, rc);
+		kfree(dma_map);
+		return rc;
+	}
+
+	genwqe_add_pin(cfile, dma_map);
+	return 0;
+}
+
+static int genwqe_unpin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
+{
+	struct genwqe_dev *cd = cfile->cd;
+	struct dma_mapping *dma_map;
+	unsigned long map_addr;
+	unsigned long map_size;
+
+	if (m->addr == 0x0)
+		return -EINVAL;
+
+	map_addr = (m->addr & PAGE_MASK);
+	map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
+
+	dma_map = genwqe_search_pin(cfile, map_addr, map_size, NULL);
+	if (dma_map == NULL)
+		return -ENOENT;
+
+	genwqe_del_pin(cfile, dma_map);
+	genwqe_user_vunmap(cd, dma_map, NULL);
+	kfree(dma_map);
+	return 0;
+}
+
+/**
+ * ddcb_cmd_cleanup() - Remove dynamically created fixup entries
+ *
+ * Only if there are any. Pinnings are not removed.
+ */
+static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req)
+{
+	unsigned int i;
+	struct dma_mapping *dma_map;
+	struct genwqe_dev *cd = cfile->cd;
+
+	for (i = 0; i < DDCB_FIXUPS; i++) {
+		dma_map = &req->dma_mappings[i];
+
+		if (dma_mapping_used(dma_map)) {
+			__genwqe_del_mapping(cfile, dma_map);
+			genwqe_user_vunmap(cd, dma_map, req);
+		}
+		if (req->sgls[i].sgl != NULL)
+			genwqe_free_sync_sgl(cd, &req->sgls[i]);
+	}
+	return 0;
+}
+
+/**
+ * ddcb_cmd_fixups() - Establish DMA fixups/sglists for user memory references
+ *
+ * Before the DDCB gets executed we need to handle the fixups. We
+ * replace the user-space addresses with DMA addresses or do
+ * additional setup work e.g. generating a scatter-gather list which
+ * is used to describe the memory referred to in the fixup.
+ */
+static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req)
+{
+	int rc;
+	unsigned int asiv_offs, i;
+	struct genwqe_dev *cd = cfile->cd;
+	struct genwqe_ddcb_cmd *cmd = &req->cmd;
+	struct dma_mapping *m;
+	const char *type = "UNKNOWN";
+
+	for (i = 0, asiv_offs = 0x00; asiv_offs <= 0x58;
+	     i++, asiv_offs += 0x08) {
+
+		u64 u_addr;
+		dma_addr_t d_addr;
+		u32 u_size = 0;
+		u64 ats_flags;
+
+		ats_flags = ATS_GET_FLAGS(cmd->ats, asiv_offs);
+
+		switch (ats_flags) {
+
+		case ATS_TYPE_DATA:
+			break;	/* nothing to do here */
+
+		case ATS_TYPE_FLAT_RDWR:
+		case ATS_TYPE_FLAT_RD: {
+			u_addr = be64_to_cpu(*((__be64 *)&cmd->
+					       asiv[asiv_offs]));
+			u_size = be32_to_cpu(*((__be32 *)&cmd->
+					       asiv[asiv_offs + 0x08]));
+
+			/*
+			 * No data available. Ignore u_addr in this
+			 * case and set addr to 0. Hardware must not
+			 * fetch the buffer.
+			 */
+			if (u_size == 0x0) {
+				*((__be64 *)&cmd->asiv[asiv_offs]) =
+					cpu_to_be64(0x0);
+				break;
+			}
+
+			m = __genwqe_search_mapping(cfile, u_addr, u_size,
+						   &d_addr, NULL);
+			if (m == NULL) {
+				rc = -EFAULT;
+				goto err_out;
+			}
+
+			*((__be64 *)&cmd->asiv[asiv_offs]) =
+				cpu_to_be64(d_addr);
+			break;
+		}
+
+		case ATS_TYPE_SGL_RDWR:
+		case ATS_TYPE_SGL_RD: {
+			int page_offs;
+
+			u_addr = be64_to_cpu(*((__be64 *)
+					       &cmd->asiv[asiv_offs]));
+			u_size = be32_to_cpu(*((__be32 *)
+					       &cmd->asiv[asiv_offs + 0x08]));
+
+			/*
+			 * No data available. Ignore u_addr in this
+			 * case and set addr to 0. Hardware must not
+			 * fetch the empty sgl.
+			 */
+			if (u_size == 0x0) {
+				*((__be64 *)&cmd->asiv[asiv_offs]) =
+					cpu_to_be64(0x0);
+				break;
+			}
+
+			m = genwqe_search_pin(cfile, u_addr, u_size, NULL);
+			if (m != NULL) {
+				type = "PINNING";
+				page_offs = (u_addr -
+					     (u64)m->u_vaddr)/PAGE_SIZE;
+			} else {
+				type = "MAPPING";
+				m = &req->dma_mappings[i];
+
+				genwqe_mapping_init(m,
+						    GENWQE_MAPPING_SGL_TEMP);
+				rc = genwqe_user_vmap(cd, m, (void *)u_addr,
+						      u_size, req);
+				if (rc != 0)
+					goto err_out;
+
+				__genwqe_add_mapping(cfile, m);
+				page_offs = 0;
+			}
+
+			/* create genwqe style scatter gather list */
+			rc = genwqe_alloc_sync_sgl(cd, &req->sgls[i],
+						   (void __user *)u_addr,
+						   u_size);
+			if (rc != 0)
+				goto err_out;
+
+			genwqe_setup_sgl(cd, &req->sgls[i],
+					 &m->dma_list[page_offs]);
+
+			*((__be64 *)&cmd->asiv[asiv_offs]) =
+				cpu_to_be64(req->sgls[i].sgl_dma_addr);
+
+			break;
+		}
+		default:
+			rc = -EINVAL;
+			goto err_out;
+		}
+	}
+	return 0;
+
+ err_out:
+	ddcb_cmd_cleanup(cfile, req);
+	return rc;
+}
+
+/**
+ * genwqe_execute_ddcb() - Execute DDCB using userspace address fixups
+ *
+ * The code will build up the translation tables or lookup the
+ * contignous memory allocation table to find the right translations
+ * and DMA addresses.
+ */
+static int genwqe_execute_ddcb(struct genwqe_file *cfile,
+			       struct genwqe_ddcb_cmd *cmd)
+{
+	int rc;
+	struct genwqe_dev *cd = cfile->cd;
+	struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
+
+	rc = ddcb_cmd_fixups(cfile, req);
+	if (rc != 0)
+		return rc;
+
+	rc = __genwqe_execute_raw_ddcb(cd, cmd);
+	ddcb_cmd_cleanup(cfile, req);
+	return rc;
+}
+
+static int do_execute_ddcb(struct genwqe_file *cfile,
+			   unsigned long arg, int raw)
+{
+	int rc;
+	struct genwqe_ddcb_cmd *cmd;
+	struct ddcb_requ *req;
+	struct genwqe_dev *cd = cfile->cd;
+
+	cmd = ddcb_requ_alloc();
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	req = container_of(cmd, struct ddcb_requ, cmd);
+
+	if (copy_from_user(cmd, (void __user *)arg, sizeof(*cmd))) {
+		ddcb_requ_free(cmd);
+		return -EFAULT;
+	}
+
+	if (!raw)
+		rc = genwqe_execute_ddcb(cfile, cmd);
+	else
+		rc = __genwqe_execute_raw_ddcb(cd, cmd);
+
+	/* Copy back only the modifed fields. Do not copy ASIV
+	   back since the copy got modified by the driver. */
+	if (copy_to_user((void __user *)arg, cmd,
+			 sizeof(*cmd) - DDCB_ASIV_LENGTH)) {
+		ddcb_requ_free(cmd);
+		return -EFAULT;
+	}
+
+	ddcb_requ_free(cmd);
+	return rc;
+}
+
+/**
+ * genwqe_ioctl() - IO control
+ * @filp:       file handle
+ * @cmd:        command identifier (passed from user)
+ * @arg:        argument (passed from user)
+ *
+ * Return: 0 success
+ */
+static long genwqe_ioctl(struct file *filp, unsigned int cmd,
+			 unsigned long arg)
+{
+	int rc = 0;
+	struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
+	struct genwqe_dev *cd = cfile->cd;
+	struct genwqe_reg_io __user *io;
+	u64 val;
+	u32 reg_offs;
+
+	if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE)
+		return -EINVAL;
+
+	switch (cmd) {
+
+	case GENWQE_GET_CARD_STATE:
+		put_user(cd->card_state, (enum genwqe_card_state __user *)arg);
+		return 0;
+
+		/* Register access */
+	case GENWQE_READ_REG64: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
+			return -EINVAL;
+
+		val = __genwqe_readq(cd, reg_offs);
+		put_user(val, &io->val64);
+		return 0;
+	}
+
+	case GENWQE_WRITE_REG64: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
+			return -EINVAL;
+
+		if (get_user(val, &io->val64))
+			return -EFAULT;
+
+		__genwqe_writeq(cd, reg_offs, val);
+		return 0;
+	}
+
+	case GENWQE_READ_REG32: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
+			return -EINVAL;
+
+		val = __genwqe_readl(cd, reg_offs);
+		put_user(val, &io->val64);
+		return 0;
+	}
+
+	case GENWQE_WRITE_REG32: {
+		io = (struct genwqe_reg_io __user *)arg;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (get_user(reg_offs, &io->num))
+			return -EFAULT;
+
+		if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
+			return -EINVAL;
+
+		if (get_user(val, &io->val64))
+			return -EFAULT;
+
+		__genwqe_writel(cd, reg_offs, val);
+		return 0;
+	}
+
+		/* Flash update/reading */
+	case GENWQE_SLU_UPDATE: {
+		struct genwqe_bitstream load;
+
+		if (!genwqe_is_privileged(cd))
+			return -EPERM;
+
+		if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
+			return -EPERM;
+
+		if (copy_from_user(&load, (void __user *)arg,
+				   sizeof(load)))
+			return -EFAULT;
+
+		rc = do_flash_update(cfile, &load);
+
+		if (copy_to_user((void __user *)arg, &load, sizeof(load)))
+			return -EFAULT;
+
+		return rc;
+	}
+
+	case GENWQE_SLU_READ: {
+		struct genwqe_bitstream load;
+
+		if (!genwqe_is_privileged(cd))
+			return -EPERM;
+
+		if (genwqe_flash_readback_fails(cd))
+			return -ENOSPC;	 /* known to fail for old versions */
+
+		if (copy_from_user(&load, (void __user *)arg, sizeof(load)))
+			return -EFAULT;
+
+		rc = do_flash_read(cfile, &load);
+
+		if (copy_to_user((void __user *)arg, &load, sizeof(load)))
+			return -EFAULT;
+
+		return rc;
+	}
+
+		/* memory pinning and unpinning */
+	case GENWQE_PIN_MEM: {
+		struct genwqe_mem m;
+
+		if (copy_from_user(&m, (void __user *)arg, sizeof(m)))
+			return -EFAULT;
+
+		return genwqe_pin_mem(cfile, &m);
+	}
+
+	case GENWQE_UNPIN_MEM: {
+		struct genwqe_mem m;
+
+		if (copy_from_user(&m, (void __user *)arg, sizeof(m)))
+			return -EFAULT;
+
+		return genwqe_unpin_mem(cfile, &m);
+	}
+
+		/* launch an DDCB and wait for completion */
+	case GENWQE_EXECUTE_DDCB:
+		return do_execute_ddcb(cfile, arg, 0);
+
+	case GENWQE_EXECUTE_RAW_DDCB: {
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		return do_execute_ddcb(cfile, arg, 1);
+	}
+
+	default:
+		return -EINVAL;
+	}
+
+	return rc;
+}
+
+#if defined(CONFIG_COMPAT)
+/**
+ * genwqe_compat_ioctl() - Compatibility ioctl
+ *
+ * Called whenever a 32-bit process running under a 64-bit kernel
+ * performs an ioctl on /dev/genwqe<n>_card.
+ *
+ * @filp:        file pointer.
+ * @cmd:         command.
+ * @arg:         user argument.
+ * Return:       zero on success or negative number on failure.
+ */
+static long genwqe_compat_ioctl(struct file *filp, unsigned int cmd,
+				unsigned long arg)
+{
+	return genwqe_ioctl(filp, cmd, arg);
+}
+#endif /* defined(CONFIG_COMPAT) */
+
+static const struct file_operations genwqe_fops = {
+	.owner		= THIS_MODULE,
+	.open		= genwqe_open,
+	.fasync		= genwqe_fasync,
+	.mmap		= genwqe_mmap,
+	.unlocked_ioctl	= genwqe_ioctl,
+#if defined(CONFIG_COMPAT)
+	.compat_ioctl   = genwqe_compat_ioctl,
+#endif
+	.release	= genwqe_release,
+};
+
+static int genwqe_device_initialized(struct genwqe_dev *cd)
+{
+	return cd->dev != NULL;
+}
+
+/**
+ * genwqe_device_create() - Create and configure genwqe char device
+ * @cd:      genwqe device descriptor
+ *
+ * This function must be called before we create any more genwqe
+ * character devices, because it is allocating the major and minor
+ * number which are supposed to be used by the client drivers.
+ */
+int genwqe_device_create(struct genwqe_dev *cd)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/*
+	 * Here starts the individual setup per client. It must
+	 * initialize its own cdev data structure with its own fops.
+	 * The appropriate devnum needs to be created. The ranges must
+	 * not overlap.
+	 */
+	rc = alloc_chrdev_region(&cd->devnum_genwqe, 0,
+				 GENWQE_MAX_MINOR, GENWQE_DEVNAME);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "err: alloc_chrdev_region failed\n");
+		goto err_dev;
+	}
+
+	cdev_init(&cd->cdev_genwqe, &genwqe_fops);
+	cd->cdev_genwqe.owner = THIS_MODULE;
+
+	rc = cdev_add(&cd->cdev_genwqe, cd->devnum_genwqe, 1);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "err: cdev_add failed\n");
+		goto err_add;
+	}
+
+	/*
+	 * Finally the device in /dev/... must be created. The rule is
+	 * to use card%d_clientname for each created device.
+	 */
+	cd->dev = device_create_with_groups(cd->class_genwqe,
+					    &cd->pci_dev->dev,
+					    cd->devnum_genwqe, cd,
+					    genwqe_attribute_groups,
+					    GENWQE_DEVNAME "%u_card",
+					    cd->card_idx);
+	if (IS_ERR(cd->dev)) {
+		rc = PTR_ERR(cd->dev);
+		goto err_cdev;
+	}
+
+	rc = genwqe_init_debugfs(cd);
+	if (rc != 0)
+		goto err_debugfs;
+
+	return 0;
+
+ err_debugfs:
+	device_destroy(cd->class_genwqe, cd->devnum_genwqe);
+ err_cdev:
+	cdev_del(&cd->cdev_genwqe);
+ err_add:
+	unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
+ err_dev:
+	cd->dev = NULL;
+	return rc;
+}
+
+static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd)
+{
+	int rc;
+	unsigned int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_open_files(cd))
+		return 0;
+
+	dev_warn(&pci_dev->dev, "[%s] send SIGIO and wait ...\n", __func__);
+
+	rc = genwqe_kill_fasync(cd, SIGIO);
+	if (rc > 0) {
+		/* give kill_timeout seconds to close file descriptors ... */
+		for (i = 0; (i < genwqe_kill_timeout) &&
+			     genwqe_open_files(cd); i++) {
+			dev_info(&pci_dev->dev, "  %d sec ...", i);
+
+			cond_resched();
+			msleep(1000);
+		}
+
+		/* if no open files we can safely continue, else ... */
+		if (!genwqe_open_files(cd))
+			return 0;
+
+		dev_warn(&pci_dev->dev,
+			 "[%s] send SIGKILL and wait ...\n", __func__);
+
+		rc = genwqe_force_sig(cd, SIGKILL); /* force terminate */
+		if (rc) {
+			/* Give kill_timout more seconds to end processes */
+			for (i = 0; (i < genwqe_kill_timeout) &&
+				     genwqe_open_files(cd); i++) {
+				dev_warn(&pci_dev->dev, "  %d sec ...", i);
+
+				cond_resched();
+				msleep(1000);
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * genwqe_device_remove() - Remove genwqe's char device
+ *
+ * This function must be called after the client devices are removed
+ * because it will free the major/minor number range for the genwqe
+ * drivers.
+ *
+ * This function must be robust enough to be called twice.
+ */
+int genwqe_device_remove(struct genwqe_dev *cd)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_device_initialized(cd))
+		return 1;
+
+	genwqe_inform_and_stop_processes(cd);
+
+	/*
+	 * We currently do wait until all filedescriptors are
+	 * closed. This leads to a problem when we abort the
+	 * application which will decrease this reference from
+	 * 1/unused to 0/illegal and not from 2/used 1/empty.
+	 */
+	rc = atomic_read(&cd->cdev_genwqe.kobj.kref.refcount);
+	if (rc != 1) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc);
+		panic("Fatal err: cannot free resources with pending references!");
+	}
+
+	genqwe_exit_debugfs(cd);
+	device_destroy(cd->class_genwqe, cd->devnum_genwqe);
+	cdev_del(&cd->cdev_genwqe);
+	unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
+	cd->dev = NULL;
+
+	return 0;
+}
diff --git a/drivers/misc/genwqe/card_sysfs.c b/drivers/misc/genwqe/card_sysfs.c
new file mode 100644
index 00000000000..a72a99266c3
--- /dev/null
+++ b/drivers/misc/genwqe/card_sysfs.c
@@ -0,0 +1,288 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Sysfs interfaces for the GenWQE card. There are attributes to query
+ * the version of the bitstream as well as some for the driver. For
+ * debugging, please also see the debugfs interfaces of this driver.
+ */
+
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+
+#include "card_base.h"
+#include "card_ddcb.h"
+
+static const char * const genwqe_types[] = {
+	[GENWQE_TYPE_ALTERA_230] = "GenWQE4-230",
+	[GENWQE_TYPE_ALTERA_530] = "GenWQE4-530",
+	[GENWQE_TYPE_ALTERA_A4]  = "GenWQE5-A4",
+	[GENWQE_TYPE_ALTERA_A7]  = "GenWQE5-A7",
+};
+
+static ssize_t status_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+	const char *cs[GENWQE_CARD_STATE_MAX] = { "unused", "used", "error" };
+
+	return sprintf(buf, "%s\n", cs[cd->card_state]);
+}
+static DEVICE_ATTR_RO(status);
+
+static ssize_t appid_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	char app_name[5];
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	genwqe_read_app_id(cd, app_name, sizeof(app_name));
+	return sprintf(buf, "%s\n", app_name);
+}
+static DEVICE_ATTR_RO(appid);
+
+static ssize_t version_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	u64 slu_id, app_id;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG);
+	app_id = __genwqe_readq(cd, IO_APP_UNITCFG);
+
+	return sprintf(buf, "%016llx.%016llx\n", slu_id, app_id);
+}
+static DEVICE_ATTR_RO(version);
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	u8 card_type;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	card_type = genwqe_card_type(cd);
+	return sprintf(buf, "%s\n", (card_type >= ARRAY_SIZE(genwqe_types)) ?
+		       "invalid" : genwqe_types[card_type]);
+}
+static DEVICE_ATTR_RO(type);
+
+static ssize_t driver_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	return sprintf(buf, "%s\n", DRV_VERS_STRING);
+}
+static DEVICE_ATTR_RO(driver);
+
+static ssize_t tempsens_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	u64 tempsens;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	tempsens = __genwqe_readq(cd, IO_SLU_TEMPERATURE_SENSOR);
+	return sprintf(buf, "%016llx\n", tempsens);
+}
+static DEVICE_ATTR_RO(tempsens);
+
+static ssize_t freerunning_timer_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	u64 t;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	t = __genwqe_readq(cd, IO_SLC_FREE_RUNNING_TIMER);
+	return sprintf(buf, "%016llx\n", t);
+}
+static DEVICE_ATTR_RO(freerunning_timer);
+
+static ssize_t queue_working_time_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	u64 t;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	t = __genwqe_readq(cd, IO_SLC_QUEUE_WTIME);
+	return sprintf(buf, "%016llx\n", t);
+}
+static DEVICE_ATTR_RO(queue_working_time);
+
+static ssize_t base_clock_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	u64 base_clock;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	base_clock = genwqe_base_clock_frequency(cd);
+	return sprintf(buf, "%lld\n", base_clock);
+}
+static DEVICE_ATTR_RO(base_clock);
+
+/**
+ * curr_bitstream_show() - Show the current bitstream id
+ *
+ * There is a bug in some old versions of the CPLD which selects the
+ * bitstream, which causes the IO_SLU_BITSTREAM register to report
+ * unreliable data in very rare cases. This makes this sysfs
+ * unreliable up to the point were a new CPLD version is being used.
+ *
+ * Unfortunately there is no automatic way yet to query the CPLD
+ * version, such that you need to manually ensure via programming
+ * tools that you have a recent version of the CPLD software.
+ *
+ * The proposed circumvention is to use a special recovery bitstream
+ * on the backup partition (0) to identify problems while loading the
+ * image.
+ */
+static ssize_t curr_bitstream_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int curr_bitstream;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	curr_bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
+	return sprintf(buf, "%d\n", curr_bitstream);
+}
+static DEVICE_ATTR_RO(curr_bitstream);
+
+/**
+ * next_bitstream_show() - Show the next activated bitstream
+ *
+ * IO_SLC_CFGREG_SOFTRESET: This register can only be accessed by the PF.
+ */
+static ssize_t next_bitstream_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	int next_bitstream;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	switch ((cd->softreset & 0xc) >> 2) {
+	case 0x2:
+		next_bitstream =  0;
+		break;
+	case 0x3:
+		next_bitstream =  1;
+		break;
+	default:
+		next_bitstream = -1;
+		break;		/* error */
+	}
+	return sprintf(buf, "%d\n", next_bitstream);
+}
+
+static ssize_t next_bitstream_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	int partition;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	if (kstrtoint(buf, 0, &partition) < 0)
+		return -EINVAL;
+
+	switch (partition) {
+	case 0x0:
+		cd->softreset = 0x78;
+		break;
+	case 0x1:
+		cd->softreset = 0x7c;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
+	return count;
+}
+static DEVICE_ATTR_RW(next_bitstream);
+
+/*
+ * Create device_attribute structures / params: name, mode, show, store
+ * additional flag if valid in VF
+ */
+static struct attribute *genwqe_attributes[] = {
+	&dev_attr_tempsens.attr,
+	&dev_attr_next_bitstream.attr,
+	&dev_attr_curr_bitstream.attr,
+	&dev_attr_base_clock.attr,
+	&dev_attr_driver.attr,
+	&dev_attr_type.attr,
+	&dev_attr_version.attr,
+	&dev_attr_appid.attr,
+	&dev_attr_status.attr,
+	&dev_attr_freerunning_timer.attr,
+	&dev_attr_queue_working_time.attr,
+	NULL,
+};
+
+static struct attribute *genwqe_normal_attributes[] = {
+	&dev_attr_driver.attr,
+	&dev_attr_type.attr,
+	&dev_attr_version.attr,
+	&dev_attr_appid.attr,
+	&dev_attr_status.attr,
+	&dev_attr_freerunning_timer.attr,
+	&dev_attr_queue_working_time.attr,
+	NULL,
+};
+
+/**
+ * genwqe_is_visible() - Determine if sysfs attribute should be visible or not
+ *
+ * VFs have restricted mmio capabilities, so not all sysfs entries
+ * are allowed in VFs.
+ */
+static umode_t genwqe_is_visible(struct kobject *kobj,
+				 struct attribute *attr, int n)
+{
+	unsigned int j;
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+	umode_t mode = attr->mode;
+
+	if (genwqe_is_privileged(cd))
+		return mode;
+
+	for (j = 0; genwqe_normal_attributes[j] != NULL;  j++)
+		if (genwqe_normal_attributes[j] == attr)
+			return mode;
+
+	return 0;
+}
+
+static struct attribute_group genwqe_attribute_group = {
+	.is_visible = genwqe_is_visible,
+	.attrs      = genwqe_attributes,
+};
+
+const struct attribute_group *genwqe_attribute_groups[] = {
+	&genwqe_attribute_group,
+	NULL,
+};
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
new file mode 100644
index 00000000000..62cc6bb3f62
--- /dev/null
+++ b/drivers/misc/genwqe/card_utils.c
@@ -0,0 +1,1034 @@
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Miscelanous functionality used in the other GenWQE driver parts.
+ */
+
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/page-flags.h>
+#include <linux/scatterlist.h>
+#include <linux/hugetlb.h>
+#include <linux/iommu.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <asm/pgtable.h>
+
+#include "genwqe_driver.h"
+#include "card_base.h"
+#include "card_ddcb.h"
+
+/**
+ * __genwqe_writeq() - Write 64-bit register
+ * @cd:	        genwqe device descriptor
+ * @byte_offs:  byte offset within BAR
+ * @val:        64-bit value
+ *
+ * Return: 0 if success; < 0 if error
+ */
+int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return -EIO;
+
+	if (cd->mmio == NULL)
+		return -EIO;
+
+	__raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs);
+	return 0;
+}
+
+/**
+ * __genwqe_readq() - Read 64-bit register
+ * @cd:         genwqe device descriptor
+ * @byte_offs:  offset within BAR
+ *
+ * Return: value from register
+ */
+u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return 0xffffffffffffffffull;
+
+	if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) &&
+	    (byte_offs == IO_SLC_CFGREG_GFIR))
+		return 0x000000000000ffffull;
+
+	if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) &&
+	    (byte_offs == IO_SLC_CFGREG_GFIR))
+		return 0x00000000ffff0000ull;
+
+	if (cd->mmio == NULL)
+		return 0xffffffffffffffffull;
+
+	return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs));
+}
+
+/**
+ * __genwqe_writel() - Write 32-bit register
+ * @cd:	        genwqe device descriptor
+ * @byte_offs:  byte offset within BAR
+ * @val:        32-bit value
+ *
+ * Return: 0 if success; < 0 if error
+ */
+int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return -EIO;
+
+	if (cd->mmio == NULL)
+		return -EIO;
+
+	__raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs);
+	return 0;
+}
+
+/**
+ * __genwqe_readl() - Read 32-bit register
+ * @cd:         genwqe device descriptor
+ * @byte_offs:  offset within BAR
+ *
+ * Return: Value from register
+ */
+u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs)
+{
+	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
+		return 0xffffffff;
+
+	if (cd->mmio == NULL)
+		return 0xffffffff;
+
+	return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs));
+}
+
+/**
+ * genwqe_read_app_id() - Extract app_id
+ *
+ * app_unitcfg need to be filled with valid data first
+ */
+int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len)
+{
+	int i, j;
+	u32 app_id = (u32)cd->app_unitcfg;
+
+	memset(app_name, 0, len);
+	for (i = 0, j = 0; j < min(len, 4); j++) {
+		char ch = (char)((app_id >> (24 - j*8)) & 0xff);
+		if (ch == ' ')
+			continue;
+		app_name[i++] = isprint(ch) ? ch : 'X';
+	}
+	return i;
+}
+
+/**
+ * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations
+ *
+ * Existing kernel functions seem to use a different polynom,
+ * therefore we could not use them here.
+ *
+ * Genwqe's Polynomial = 0x20044009
+ */
+#define CRC32_POLYNOMIAL	0x20044009
+static u32 crc32_tab[256];	/* crc32 lookup table */
+
+void genwqe_init_crc32(void)
+{
+	int i, j;
+	u32 crc;
+
+	for (i = 0;  i < 256;  i++) {
+		crc = i << 24;
+		for (j = 0;  j < 8;  j++) {
+			if (crc & 0x80000000)
+				crc = (crc << 1) ^ CRC32_POLYNOMIAL;
+			else
+				crc = (crc << 1);
+		}
+		crc32_tab[i] = crc;
+	}
+}
+
+/**
+ * genwqe_crc32() - Generate 32-bit crc as required for DDCBs
+ * @buff:       pointer to data buffer
+ * @len:        length of data for calculation
+ * @init:       initial crc (0xffffffff at start)
+ *
+ * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009)
+
+ * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should
+ * result in a crc32 of 0xf33cb7d3.
+ *
+ * The existing kernel crc functions did not cover this polynom yet.
+ *
+ * Return: crc32 checksum.
+ */
+u32 genwqe_crc32(u8 *buff, size_t len, u32 init)
+{
+	int i;
+	u32 crc;
+
+	crc = init;
+	while (len--) {
+		i = ((crc >> 24) ^ *buff++) & 0xFF;
+		crc = (crc << 8) ^ crc32_tab[i];
+	}
+	return crc;
+}
+
+void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
+			       dma_addr_t *dma_handle)
+{
+	if (get_order(size) > MAX_ORDER)
+		return NULL;
+
+	return pci_alloc_consistent(cd->pci_dev, size, dma_handle);
+}
+
+void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
+			     void *vaddr, dma_addr_t dma_handle)
+{
+	if (vaddr == NULL)
+		return;
+
+	pci_free_consistent(cd->pci_dev, size, vaddr, dma_handle);
+}
+
+static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list,
+			      int num_pages)
+{
+	int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) {
+		pci_unmap_page(pci_dev, dma_list[i],
+			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+		dma_list[i] = 0x0;
+	}
+}
+
+static int genwqe_map_pages(struct genwqe_dev *cd,
+			   struct page **page_list, int num_pages,
+			   dma_addr_t *dma_list)
+{
+	int i;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	/* establish DMA mapping for requested pages */
+	for (i = 0; i < num_pages; i++) {
+		dma_addr_t daddr;
+
+		dma_list[i] = 0x0;
+		daddr = pci_map_page(pci_dev, page_list[i],
+				     0,	 /* map_offs */
+				     PAGE_SIZE,
+				     PCI_DMA_BIDIRECTIONAL);  /* FIXME rd/rw */
+
+		if (pci_dma_mapping_error(pci_dev, daddr)) {
+			dev_err(&pci_dev->dev,
+				"[%s] err: no dma addr daddr=%016llx!\n",
+				__func__, (long long)daddr);
+			goto err;
+		}
+
+		dma_list[i] = daddr;
+	}
+	return 0;
+
+ err:
+	genwqe_unmap_pages(cd, dma_list, num_pages);
+	return -EIO;
+}
+
+static int genwqe_sgl_size(int num_pages)
+{
+	int len, num_tlb = num_pages / 7;
+
+	len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1);
+	return roundup(len, PAGE_SIZE);
+}
+
+/**
+ * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages
+ *
+ * Allocates memory for sgl and overlapping pages. Pages which might
+ * overlap other user-space memory blocks are being cached for DMAs,
+ * such that we do not run into syncronization issues. Data is copied
+ * from user-space into the cached pages.
+ */
+int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+			  void __user *user_addr, size_t user_size)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	sgl->fpage_offs = offset_in_page((unsigned long)user_addr);
+	sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size);
+	sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE);
+	sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE;
+
+	dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld "
+		"fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n",
+		__func__, user_addr, user_size, sgl->nr_pages,
+		sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size);
+
+	sgl->user_addr = user_addr;
+	sgl->user_size = user_size;
+	sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages);
+
+	if (get_order(sgl->sgl_size) > MAX_ORDER) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: too much memory requested!\n", __func__);
+		return -ENOMEM;
+	}
+
+	sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size,
+					     &sgl->sgl_dma_addr);
+	if (sgl->sgl == NULL) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: no memory available!\n", __func__);
+		return -ENOMEM;
+	}
+
+	/* Only use buffering on incomplete pages */
+	if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) {
+		sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
+						       &sgl->fpage_dma_addr);
+		if (sgl->fpage == NULL)
+			goto err_out;
+
+		/* Sync with user memory */
+		if (copy_from_user(sgl->fpage + sgl->fpage_offs,
+				   user_addr, sgl->fpage_size)) {
+			rc = -EFAULT;
+			goto err_out;
+		}
+	}
+	if (sgl->lpage_size != 0) {
+		sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
+						       &sgl->lpage_dma_addr);
+		if (sgl->lpage == NULL)
+			goto err_out1;
+
+		/* Sync with user memory */
+		if (copy_from_user(sgl->lpage, user_addr + user_size -
+				   sgl->lpage_size, sgl->lpage_size)) {
+			rc = -EFAULT;
+			goto err_out1;
+		}
+	}
+	return 0;
+
+ err_out1:
+	__genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
+				 sgl->fpage_dma_addr);
+ err_out:
+	__genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
+				 sgl->sgl_dma_addr);
+	return -ENOMEM;
+}
+
+int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
+		     dma_addr_t *dma_list)
+{
+	int i = 0, j = 0, p;
+	unsigned long dma_offs, map_offs;
+	dma_addr_t prev_daddr = 0;
+	struct sg_entry *s, *last_s = NULL;
+	size_t size = sgl->user_size;
+
+	dma_offs = 128;		/* next block if needed/dma_offset */
+	map_offs = sgl->fpage_offs; /* offset in first page */
+
+	s = &sgl->sgl[0];	/* first set of 8 entries */
+	p = 0;			/* page */
+	while (p < sgl->nr_pages) {
+		dma_addr_t daddr;
+		unsigned int size_to_map;
+
+		/* always write the chaining entry, cleanup is done later */
+		j = 0;
+		s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs);
+		s[j].len	 = cpu_to_be32(128);
+		s[j].flags	 = cpu_to_be32(SG_CHAINED);
+		j++;
+
+		while (j < 8) {
+			/* DMA mapping for requested page, offs, size */
+			size_to_map = min(size, PAGE_SIZE - map_offs);
+
+			if ((p == 0) && (sgl->fpage != NULL)) {
+				daddr = sgl->fpage_dma_addr + map_offs;
+
+			} else if ((p == sgl->nr_pages - 1) &&
+				   (sgl->lpage != NULL)) {
+				daddr = sgl->lpage_dma_addr;
+			} else {
+				daddr = dma_list[p] + map_offs;
+			}
+
+			size -= size_to_map;
+			map_offs = 0;
+
+			if (prev_daddr == daddr) {
+				u32 prev_len = be32_to_cpu(last_s->len);
+
+				/* pr_info("daddr combining: "
+					"%016llx/%08x -> %016llx\n",
+					prev_daddr, prev_len, daddr); */
+
+				last_s->len = cpu_to_be32(prev_len +
+							  size_to_map);
+
+				p++; /* process next page */
+				if (p == sgl->nr_pages)
+					goto fixup;  /* nothing to do */
+
+				prev_daddr = daddr + size_to_map;
+				continue;
+			}
+
+			/* start new entry */
+			s[j].target_addr = cpu_to_be64(daddr);
+			s[j].len	 = cpu_to_be32(size_to_map);
+			s[j].flags	 = cpu_to_be32(SG_DATA);
+			prev_daddr = daddr + size_to_map;
+			last_s = &s[j];
+			j++;
+
+			p++;	/* process next page */
+			if (p == sgl->nr_pages)
+				goto fixup;  /* nothing to do */
+		}
+		dma_offs += 128;
+		s += 8;		/* continue 8 elements further */
+	}
+ fixup:
+	if (j == 1) {		/* combining happend on last entry! */
+		s -= 8;		/* full shift needed on previous sgl block */
+		j =  7;		/* shift all elements */
+	}
+
+	for (i = 0; i < j; i++)	/* move elements 1 up */
+		s[i] = s[i + 1];
+
+	s[i].target_addr = cpu_to_be64(0);
+	s[i].len	 = cpu_to_be32(0);
+	s[i].flags	 = cpu_to_be32(SG_END_LIST);
+	return 0;
+}
+
+/**
+ * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages
+ *
+ * After the DMA transfer has been completed we free the memory for
+ * the sgl and the cached pages. Data is being transfered from cached
+ * pages into user-space buffers.
+ */
+int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl)
+{
+	int rc = 0;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (sgl->fpage) {
+		if (copy_to_user(sgl->user_addr, sgl->fpage + sgl->fpage_offs,
+				 sgl->fpage_size)) {
+			dev_err(&pci_dev->dev, "[%s] err: copying fpage!\n",
+				__func__);
+			rc = -EFAULT;
+		}
+		__genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
+					 sgl->fpage_dma_addr);
+		sgl->fpage = NULL;
+		sgl->fpage_dma_addr = 0;
+	}
+	if (sgl->lpage) {
+		if (copy_to_user(sgl->user_addr + sgl->user_size -
+				 sgl->lpage_size, sgl->lpage,
+				 sgl->lpage_size)) {
+			dev_err(&pci_dev->dev, "[%s] err: copying lpage!\n",
+				__func__);
+			rc = -EFAULT;
+		}
+		__genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
+					 sgl->lpage_dma_addr);
+		sgl->lpage = NULL;
+		sgl->lpage_dma_addr = 0;
+	}
+	__genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
+				 sgl->sgl_dma_addr);
+
+	sgl->sgl = NULL;
+	sgl->sgl_dma_addr = 0x0;
+	sgl->sgl_size = 0;
+	return rc;
+}
+
+/**
+ * free_user_pages() - Give pinned pages back
+ *
+ * Documentation of get_user_pages is in mm/memory.c:
+ *
+ * If the page is written to, set_page_dirty (or set_page_dirty_lock,
+ * as appropriate) must be called after the page is finished with, and
+ * before put_page is called.
+ *
+ * FIXME Could be of use to others and might belong in the generic
+ * code, if others agree. E.g.
+ *    ll_free_user_pages in drivers/staging/lustre/lustre/llite/rw26.c
+ *    ceph_put_page_vector in net/ceph/pagevec.c
+ *    maybe more?
+ */
+static int free_user_pages(struct page **page_list, unsigned int nr_pages,
+			   int dirty)
+{
+	unsigned int i;
+
+	for (i = 0; i < nr_pages; i++) {
+		if (page_list[i] != NULL) {
+			if (dirty)
+				set_page_dirty_lock(page_list[i]);
+			put_page(page_list[i]);
+		}
+	}
+	return 0;
+}
+
+/**
+ * genwqe_user_vmap() - Map user-space memory to virtual kernel memory
+ * @cd:         pointer to genwqe device
+ * @m:          mapping params
+ * @uaddr:      user virtual address
+ * @size:       size of memory to be mapped
+ *
+ * We need to think about how we could speed this up. Of course it is
+ * not a good idea to do this over and over again, like we are
+ * currently doing it. Nevertheless, I am curious where on the path
+ * the performance is spend. Most probably within the memory
+ * allocation functions, but maybe also in the DMA mapping code.
+ *
+ * Restrictions: The maximum size of the possible mapping currently depends
+ *               on the amount of memory we can get using kzalloc() for the
+ *               page_list and pci_alloc_consistent for the sg_list.
+ *               The sg_list is currently itself not scattered, which could
+ *               be fixed with some effort. The page_list must be split into
+ *               PAGE_SIZE chunks too. All that will make the complicated
+ *               code more complicated.
+ *
+ * Return: 0 if success
+ */
+int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr,
+		     unsigned long size, struct ddcb_requ *req)
+{
+	int rc = -EINVAL;
+	unsigned long data, offs;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if ((uaddr == NULL) || (size == 0)) {
+		m->size = 0;	/* mark unused and not added */
+		return -EINVAL;
+	}
+	m->u_vaddr = uaddr;
+	m->size    = size;
+
+	/* determine space needed for page_list. */
+	data = (unsigned long)uaddr;
+	offs = offset_in_page(data);
+	m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE);
+
+	m->page_list = kcalloc(m->nr_pages,
+			       sizeof(struct page *) + sizeof(dma_addr_t),
+			       GFP_KERNEL);
+	if (!m->page_list) {
+		dev_err(&pci_dev->dev, "err: alloc page_list failed\n");
+		m->nr_pages = 0;
+		m->u_vaddr = NULL;
+		m->size = 0;	/* mark unused and not added */
+		return -ENOMEM;
+	}
+	m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages);
+
+	/* pin user pages in memory */
+	rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */
+				 m->nr_pages,
+				 1,		/* write by caller */
+				 m->page_list);	/* ptrs to pages */
+
+	/* assumption: get_user_pages can be killed by signals. */
+	if (rc < m->nr_pages) {
+		free_user_pages(m->page_list, rc, 0);
+		rc = -EFAULT;
+		goto fail_get_user_pages;
+	}
+
+	rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list);
+	if (rc != 0)
+		goto fail_free_user_pages;
+
+	return 0;
+
+ fail_free_user_pages:
+	free_user_pages(m->page_list, m->nr_pages, 0);
+
+ fail_get_user_pages:
+	kfree(m->page_list);
+	m->page_list = NULL;
+	m->dma_list = NULL;
+	m->nr_pages = 0;
+	m->u_vaddr = NULL;
+	m->size = 0;		/* mark unused and not added */
+	return rc;
+}
+
+/**
+ * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel
+ *                        memory
+ * @cd:         pointer to genwqe device
+ * @m:          mapping params
+ */
+int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m,
+		       struct ddcb_requ *req)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!dma_mapping_used(m)) {
+		dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n",
+			__func__, m);
+		return -EINVAL;
+	}
+
+	if (m->dma_list)
+		genwqe_unmap_pages(cd, m->dma_list, m->nr_pages);
+
+	if (m->page_list) {
+		free_user_pages(m->page_list, m->nr_pages, 1);
+
+		kfree(m->page_list);
+		m->page_list = NULL;
+		m->dma_list = NULL;
+		m->nr_pages = 0;
+	}
+
+	m->u_vaddr = NULL;
+	m->size = 0;		/* mark as unused and not added */
+	return 0;
+}
+
+/**
+ * genwqe_card_type() - Get chip type SLU Configuration Register
+ * @cd:         pointer to the genwqe device descriptor
+ * Return: 0: Altera Stratix-IV 230
+ *         1: Altera Stratix-IV 530
+ *         2: Altera Stratix-V A4
+ *         3: Altera Stratix-V A7
+ */
+u8 genwqe_card_type(struct genwqe_dev *cd)
+{
+	u64 card_type = cd->slu_unitcfg;
+	return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20);
+}
+
+/**
+ * genwqe_card_reset() - Reset the card
+ * @cd:         pointer to the genwqe device descriptor
+ */
+int genwqe_card_reset(struct genwqe_dev *cd)
+{
+	u64 softrst;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (!genwqe_is_privileged(cd))
+		return -ENODEV;
+
+	/* new SL */
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull);
+	msleep(1000);
+	__genwqe_readq(cd, IO_HSU_FIR_CLR);
+	__genwqe_readq(cd, IO_APP_FIR_CLR);
+	__genwqe_readq(cd, IO_SLU_FIR_CLR);
+
+	/*
+	 * Read-modify-write to preserve the stealth bits
+	 *
+	 * For SL >= 039, Stealth WE bit allows removing
+	 * the read-modify-wrote.
+	 * r-m-w may require a mask 0x3C to avoid hitting hard
+	 * reset again for error reset (should be 0, chicken).
+	 */
+	softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull;
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull);
+
+	/* give ERRORRESET some time to finish */
+	msleep(50);
+
+	if (genwqe_need_err_masking(cd)) {
+		dev_info(&pci_dev->dev,
+			 "[%s] masking errors for old bitstreams\n", __func__);
+		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
+	}
+	return 0;
+}
+
+int genwqe_read_softreset(struct genwqe_dev *cd)
+{
+	u64 bitstream;
+
+	if (!genwqe_is_privileged(cd))
+		return -ENODEV;
+
+	bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
+	cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull;
+	return 0;
+}
+
+/**
+ * genwqe_set_interrupt_capability() - Configure MSI capability structure
+ * @cd:         pointer to the device
+ * Return: 0 if no error
+ */
+int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count)
+{
+	int rc;
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	rc = pci_enable_msi_exact(pci_dev, count);
+	if (rc == 0)
+		cd->flags |= GENWQE_FLAG_MSI_ENABLED;
+	return rc;
+}
+
+/**
+ * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability()
+ * @cd:         pointer to the device
+ */
+void genwqe_reset_interrupt_capability(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+
+	if (cd->flags & GENWQE_FLAG_MSI_ENABLED) {
+		pci_disable_msi(pci_dev);
+		cd->flags &= ~GENWQE_FLAG_MSI_ENABLED;
+	}
+}
+
+/**
+ * set_reg_idx() - Fill array with data. Ignore illegal offsets.
+ * @cd:         card device
+ * @r:          debug register array
+ * @i:          index to desired entry
+ * @m:          maximum possible entries
+ * @addr:       addr which is read
+ * @index:      index in debug array
+ * @val:        read value
+ */
+static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r,
+		       unsigned int *i, unsigned int m, u32 addr, u32 idx,
+		       u64 val)
+{
+	if (WARN_ON_ONCE(*i >= m))
+		return -EFAULT;
+
+	r[*i].addr = addr;
+	r[*i].idx = idx;
+	r[*i].val = val;
+	++*i;
+	return 0;
+}
+
+static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r,
+		   unsigned int *i, unsigned int m, u32 addr, u64 val)
+{
+	return set_reg_idx(cd, r, i, m, addr, 0, val);
+}
+
+int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
+			 unsigned int max_regs, int all)
+{
+	unsigned int i, j, idx = 0;
+	u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr;
+	u64 gfir, sluid, appid, ufir, ufec, sfir, sfec;
+
+	/* Global FIR */
+	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
+	set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir);
+
+	/* UnitCfg for SLU */
+	sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */
+	set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid);
+
+	/* UnitCfg for APP */
+	appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */
+	set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid);
+
+	/* Check all chip Units */
+	for (i = 0; i < GENWQE_MAX_UNITS; i++) {
+
+		/* Unit FIR */
+		ufir_addr = (i << 24) | 0x008;
+		ufir = __genwqe_readq(cd, ufir_addr);
+		set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir);
+
+		/* Unit FEC */
+		ufec_addr = (i << 24) | 0x018;
+		ufec = __genwqe_readq(cd, ufec_addr);
+		set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec);
+
+		for (j = 0; j < 64; j++) {
+			/* wherever there is a primary 1, read the 2ndary */
+			if (!all && (!(ufir & (1ull << j))))
+				continue;
+
+			sfir_addr = (i << 24) | (0x100 + 8 * j);
+			sfir = __genwqe_readq(cd, sfir_addr);
+			set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir);
+
+			sfec_addr = (i << 24) | (0x300 + 8 * j);
+			sfec = __genwqe_readq(cd, sfec_addr);
+			set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec);
+		}
+	}
+
+	/* fill with invalid data until end */
+	for (i = idx; i < max_regs; i++) {
+		regs[i].addr = 0xffffffff;
+		regs[i].val = 0xffffffffffffffffull;
+	}
+	return idx;
+}
+
+/**
+ * genwqe_ffdc_buff_size() - Calculates the number of dump registers
+ */
+int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid)
+{
+	int entries = 0, ring, traps, traces, trace_entries;
+	u32 eevptr_addr, l_addr, d_len, d_type;
+	u64 eevptr, val, addr;
+
+	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
+	eevptr = __genwqe_readq(cd, eevptr_addr);
+
+	if ((eevptr != 0x0) && (eevptr != -1ull)) {
+		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
+
+		while (1) {
+			val = __genwqe_readq(cd, l_addr);
+
+			if ((val == 0x0) || (val == -1ull))
+				break;
+
+			/* 38:24 */
+			d_len  = (val & 0x0000007fff000000ull) >> 24;
+
+			/* 39 */
+			d_type = (val & 0x0000008000000000ull) >> 36;
+
+			if (d_type) {	/* repeat */
+				entries += d_len;
+			} else {	/* size in bytes! */
+				entries += d_len >> 3;
+			}
+
+			l_addr += 8;
+		}
+	}
+
+	for (ring = 0; ring < 8; ring++) {
+		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
+		val = __genwqe_readq(cd, addr);
+
+		if ((val == 0x0ull) || (val == -1ull))
+			continue;
+
+		traps = (val >> 24) & 0xff;
+		traces = (val >> 16) & 0xff;
+		trace_entries = val & 0xffff;
+
+		entries += traps + (traces * trace_entries);
+	}
+	return entries;
+}
+
+/**
+ * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure
+ */
+int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid,
+			  struct genwqe_reg *regs, unsigned int max_regs)
+{
+	int i, traps, traces, trace, trace_entries, trace_entry, ring;
+	unsigned int idx = 0;
+	u32 eevptr_addr, l_addr, d_addr, d_len, d_type;
+	u64 eevptr, e, val, addr;
+
+	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
+	eevptr = __genwqe_readq(cd, eevptr_addr);
+
+	if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) {
+		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
+		while (1) {
+			e = __genwqe_readq(cd, l_addr);
+			if ((e == 0x0) || (e == 0xffffffffffffffffull))
+				break;
+
+			d_addr = (e & 0x0000000000ffffffull);	    /* 23:0 */
+			d_len  = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */
+			d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */
+			d_addr |= GENWQE_UID_OFFS(uid);
+
+			if (d_type) {
+				for (i = 0; i < (int)d_len; i++) {
+					val = __genwqe_readq(cd, d_addr);
+					set_reg_idx(cd, regs, &idx, max_regs,
+						    d_addr, i, val);
+				}
+			} else {
+				d_len >>= 3; /* Size in bytes! */
+				for (i = 0; i < (int)d_len; i++, d_addr += 8) {
+					val = __genwqe_readq(cd, d_addr);
+					set_reg_idx(cd, regs, &idx, max_regs,
+						    d_addr, 0, val);
+				}
+			}
+			l_addr += 8;
+		}
+	}
+
+	/*
+	 * To save time, there are only 6 traces poplulated on Uid=2,
+	 * Ring=1. each with iters=512.
+	 */
+	for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds,
+					      2...7 are ASI rings */
+		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
+		val = __genwqe_readq(cd, addr);
+
+		if ((val == 0x0ull) || (val == -1ull))
+			continue;
+
+		traps = (val >> 24) & 0xff;	/* Number of Traps	*/
+		traces = (val >> 16) & 0xff;	/* Number of Traces	*/
+		trace_entries = val & 0xffff;	/* Entries per trace	*/
+
+		/* Note: This is a combined loop that dumps both the traps */
+		/* (for the trace == 0 case) as well as the traces 1 to    */
+		/* 'traces'.						   */
+		for (trace = 0; trace <= traces; trace++) {
+			u32 diag_sel =
+				GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace);
+
+			addr = (GENWQE_UID_OFFS(uid) |
+				IO_EXTENDED_DIAG_SELECTOR);
+			__genwqe_writeq(cd, addr, diag_sel);
+
+			for (trace_entry = 0;
+			     trace_entry < (trace ? trace_entries : traps);
+			     trace_entry++) {
+				addr = (GENWQE_UID_OFFS(uid) |
+					IO_EXTENDED_DIAG_READ_MBX);
+				val = __genwqe_readq(cd, addr);
+				set_reg_idx(cd, regs, &idx, max_regs, addr,
+					    (diag_sel<<16) | trace_entry, val);
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * genwqe_write_vreg() - Write register in virtual window
+ *
+ * Note, these registers are only accessible to the PF through the
+ * VF-window. It is not intended for the VF to access.
+ */
+int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func)
+{
+	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
+	__genwqe_writeq(cd, reg, val);
+	return 0;
+}
+
+/**
+ * genwqe_read_vreg() - Read register in virtual window
+ *
+ * Note, these registers are only accessible to the PF through the
+ * VF-window. It is not intended for the VF to access.
+ */
+u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func)
+{
+	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
+	return __genwqe_readq(cd, reg);
+}
+
+/**
+ * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card
+ *
+ * Note: From a design perspective it turned out to be a bad idea to
+ * use codes here to specifiy the frequency/speed values. An old
+ * driver cannot understand new codes and is therefore always a
+ * problem. Better is to measure out the value or put the
+ * speed/frequency directly into a register which is always a valid
+ * value for old as well as for new software.
+ *
+ * Return: Card clock in MHz
+ */
+int genwqe_base_clock_frequency(struct genwqe_dev *cd)
+{
+	u16 speed;		/*         MHz  MHz  MHz  MHz */
+	static const int speed_grade[] = { 250, 200, 166, 175 };
+
+	speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
+	if (speed >= ARRAY_SIZE(speed_grade))
+		return 0;	/* illegal value */
+
+	return speed_grade[speed];
+}
+
+/**
+ * genwqe_stop_traps() - Stop traps
+ *
+ * Before reading out the analysis data, we need to stop the traps.
+ */
+void genwqe_stop_traps(struct genwqe_dev *cd)
+{
+	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull);
+}
+
+/**
+ * genwqe_start_traps() - Start traps
+ *
+ * After having read the data, we can/must enable the traps again.
+ */
+void genwqe_start_traps(struct genwqe_dev *cd)
+{
+	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull);
+
+	if (genwqe_need_err_masking(cd))
+		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
+}
diff --git a/drivers/misc/genwqe/genwqe_driver.h b/drivers/misc/genwqe/genwqe_driver.h
new file mode 100644
index 00000000000..cd5263163a6
--- /dev/null
+++ b/drivers/misc/genwqe/genwqe_driver.h
@@ -0,0 +1,77 @@
+#ifndef __GENWQE_DRIVER_H__
+#define __GENWQE_DRIVER_H__
+
+/**
+ * IBM Accelerator Family 'GenWQE'
+ *
+ * (C) Copyright IBM Corp. 2013
+ *
+ * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
+ * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
+ * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Ruettger <michael@ibmra.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
+#include <linux/scatterlist.h>
+#include <linux/iommu.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+
+#include <asm/byteorder.h>
+#include <linux/genwqe/genwqe_card.h>
+
+#define DRV_VERS_STRING		"2.0.15"
+
+/*
+ * Static minor number assignement, until we decide/implement
+ * something dynamic.
+ */
+#define GENWQE_MAX_MINOR	128 /* up to 128 possible genwqe devices */
+
+/**
+ * genwqe_requ_alloc() - Allocate a new DDCB execution request
+ *
+ * This data structure contains the user visiable fields of the DDCB
+ * to be executed.
+ *
+ * Return: ptr to genwqe_ddcb_cmd data structure
+ */
+struct genwqe_ddcb_cmd *ddcb_requ_alloc(void);
+
+/**
+ * ddcb_requ_free() - Free DDCB execution request.
+ * @req:       ptr to genwqe_ddcb_cmd data structure.
+ */
+void ddcb_requ_free(struct genwqe_ddcb_cmd *req);
+
+u32  genwqe_crc32(u8 *buff, size_t len, u32 init);
+
+static inline void genwqe_hexdump(struct pci_dev *pci_dev,
+				  const void *buff, unsigned int size)
+{
+	char prefix[32];
+
+	scnprintf(prefix, sizeof(prefix), "%s %s: ",
+		  GENWQE_DEVNAME, pci_name(pci_dev));
+
+	print_hex_dump_debug(prefix, DUMP_PREFIX_OFFSET, 16, 1, buff,
+			     size, true);
+}
+
+#endif	/* __GENWQE_DRIVER_H__ */
diff --git a/drivers/misc/hmc6352.c b/drivers/misc/hmc6352.c
index 170bd3daf33..90520d76633 100644
--- a/drivers/misc/hmc6352.c
+++ b/drivers/misc/hmc6352.c
@@ -22,7 +22,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/err.h>
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
index 621c7a37339..b83e3ca12a4 100644
--- a/drivers/misc/hpilo.c
+++ b/drivers/misc/hpilo.c
@@ -759,7 +759,7 @@ static int ilo_probe(struct pci_dev *pdev,
 
 	/* Ignore subsystem_device = 0x1979 (set by BIOS)  */
 	if (pdev->subsystem_device == 0x1979)
-		goto out;
+		return 0;
 
 	if (max_ccb > MAX_CCB)
 		max_ccb = MAX_CCB;
@@ -899,7 +899,7 @@ static void __exit ilo_exit(void)
 	class_destroy(ilo_class);
 }
 
-MODULE_VERSION("1.4");
+MODULE_VERSION("1.4.1");
 MODULE_ALIAS(ILO_NAME);
 MODULE_DESCRIPTION(ILO_NAME);
 MODULE_AUTHOR("David Altobelli <david.altobelli@hp.com>");
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index ce5b75616b4..e8b933111e0 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -149,8 +149,7 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
 	return ret;
 }
 
-static struct dentry *ibmasmfs_create_file (struct super_block *sb,
-			struct dentry *parent,
+static struct dentry *ibmasmfs_create_file(struct dentry *parent,
 			const char *name,
 			const struct file_operations *fops,
 			void *data,
@@ -163,7 +162,7 @@ static struct dentry *ibmasmfs_create_file (struct super_block *sb,
 	if (!dentry)
 		return NULL;
 
-	inode = ibmasmfs_make_inode(sb, S_IFREG | mode);
+	inode = ibmasmfs_make_inode(parent->d_sb, S_IFREG | mode);
 	if (!inode) {
 		dput(dentry);
 		return NULL;
@@ -176,8 +175,7 @@ static struct dentry *ibmasmfs_create_file (struct super_block *sb,
 	return dentry;
 }
 
-static struct dentry *ibmasmfs_create_dir (struct super_block *sb,
-				struct dentry *parent,
+static struct dentry *ibmasmfs_create_dir(struct dentry *parent,
 				const char *name)
 {
 	struct dentry *dentry;
@@ -187,7 +185,7 @@ static struct dentry *ibmasmfs_create_dir (struct super_block *sb,
 	if (!dentry)
 		return NULL;
 
-	inode = ibmasmfs_make_inode(sb, S_IFDIR | 0500);
+	inode = ibmasmfs_make_inode(parent->d_sb, S_IFDIR | 0500);
 	if (!inode) {
 		dput(dentry);
 		return NULL;
@@ -612,20 +610,20 @@ static void ibmasmfs_create_files (struct super_block *sb)
 		struct dentry *dir;
 		struct dentry *remote_dir;
 		sp = list_entry(entry, struct service_processor, node);
-		dir = ibmasmfs_create_dir(sb, sb->s_root, sp->dirname);
+		dir = ibmasmfs_create_dir(sb->s_root, sp->dirname);
 		if (!dir)
 			continue;
 
-		ibmasmfs_create_file(sb, dir, "command", &command_fops, sp, S_IRUSR|S_IWUSR);
-		ibmasmfs_create_file(sb, dir, "event", &event_fops, sp, S_IRUSR|S_IWUSR);
-		ibmasmfs_create_file(sb, dir, "reverse_heartbeat", &r_heartbeat_fops, sp, S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(dir, "command", &command_fops, sp, S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(dir, "event", &event_fops, sp, S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(dir, "reverse_heartbeat", &r_heartbeat_fops, sp, S_IRUSR|S_IWUSR);
 
-		remote_dir = ibmasmfs_create_dir(sb, dir, "remote_video");
+		remote_dir = ibmasmfs_create_dir(dir, "remote_video");
 		if (!remote_dir)
 			continue;
 
-		ibmasmfs_create_file(sb, remote_dir, "width", &remote_settings_fops, (void *)display_width(sp), S_IRUSR|S_IWUSR);
-		ibmasmfs_create_file(sb, remote_dir, "height", &remote_settings_fops, (void *)display_height(sp), S_IRUSR|S_IWUSR);
-		ibmasmfs_create_file(sb, remote_dir, "depth", &remote_settings_fops, (void *)display_depth(sp), S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(remote_dir, "width", &remote_settings_fops, (void *)display_width(sp), S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(remote_dir, "height", &remote_settings_fops, (void *)display_height(sp), S_IRUSR|S_IWUSR);
+		ibmasmfs_create_file(remote_dir, "depth", &remote_settings_fops, (void *)display_depth(sp), S_IRUSR|S_IWUSR);
 	}
 }
diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c
author	Jesper Juhl <jj@chaosbits.net>	2011-11-13 23:11:50 +0100
committer	Takashi Iwai <tiwai@suse.de>	2011-11-14 10:41:46 +0100
commit	e53de8f00c80fd1a312c95bc5157fdb98d46e070 (patch)
tree	98c61d5bf0fd658de9e6c97cd621cfcdba9ecb0c /fs/jbd/commit.c
parent	afef2cfa0ecf45dec7e88db9fa312ee82d347111 (diff)