aboutsummaryrefslogtreecommitdiff
path: root/drivers/xen
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 13:45:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 13:45:15 -0700
commit111ad119d1765b1bbef2629a5f2bd825caeb7e74 (patch)
tree167ee4a4e6e9276bb7178ddcce85d6860543cfb4 /drivers/xen
parent997271cf5e12c1b38aec0764187094663501c984 (diff)
parent3a6d28b11a895d08b6b4fc6f16dd9ff995844b45 (diff)
Merge branch 'stable/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
* 'stable/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen/pciback: Have 'passthrough' option instead of XEN_PCIDEV_BACKEND_PASS and XEN_PCIDEV_BACKEND_VPCI xen/pciback: Remove the DEBUG option. xen/pciback: Drop two backends, squash and cleanup some code. xen/pciback: Print out the MSI/MSI-X (PIRQ) values xen/pciback: Don't setup an fake IRQ handler for SR-IOV devices. xen: rename pciback module to xen-pciback. xen/pciback: Fine-grain the spinlocks and fix BUG: scheduling while atomic cases. xen/pciback: Allocate IRQ handler for device that is shared with guest. xen/pciback: Disable MSI/MSI-X when reseting a device xen/pciback: guest SR-IOV support for PV guest xen/pciback: Register the owner (domain) of the PCI device. xen/pciback: Cleanup the driver based on checkpatch warnings and errors. xen/pciback: xen pci backend driver. xen: tmem: self-ballooning and frontswap-selfshrinking xen: Add module alias to autoload backend drivers xen: Populate xenbus device attributes xen: Add __attribute__((format(printf... where appropriate xen: prepare tmem shim to handle frontswap xen: allow enable use of VGA console on dom0
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig46
-rw-r--r--drivers/xen/Makefile4
-rw-r--r--drivers/xen/tmem.c170
-rw-r--r--drivers/xen/xen-balloon.c2
-rw-r--r--drivers/xen/xen-pciback/Makefile7
-rw-r--r--drivers/xen/xen-pciback/conf_space.c438
-rw-r--r--drivers/xen/xen-pciback/conf_space.h126
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability.c207
-rw-r--r--drivers/xen/xen-pciback/conf_space_header.c386
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.c140
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.h33
-rw-r--r--drivers/xen/xen-pciback/passthrough.c194
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c1376
-rw-r--r--drivers/xen/xen-pciback/pciback.h183
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c384
-rw-r--r--drivers/xen/xen-pciback/vpci.c259
-rw-r--r--drivers/xen/xen-pciback/xenbus.c749
-rw-r--r--drivers/xen/xen-selfballoon.c485
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c44
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h2
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c9
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c6
22 files changed, 5199 insertions, 51 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a59638b37c1..03bc471c3ee 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -9,6 +9,23 @@ config XEN_BALLOON
the system to expand the domain's memory allocation, or alternatively
return unneeded memory to the system.
+config XEN_SELFBALLOONING
+ bool "Dynamically self-balloon kernel memory to target"
+ depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP
+ default n
+ help
+ Self-ballooning dynamically balloons available kernel memory driven
+ by the current usage of anonymous memory ("committed AS") and
+ controlled by various sysfs-settable parameters. Configuring
+ FRONTSWAP is highly recommended; if it is not configured, self-
+ ballooning is disabled by default but can be enabled with the
+ 'selfballooning' kernel boot parameter. If FRONTSWAP is configured,
+ frontswap-selfshrinking is enabled by default but can be disabled
+ with the 'noselfshrink' kernel boot parameter; and self-ballooning
+ is enabled by default but can be disabled with the 'noselfballooning'
+ kernel boot parameter. Note that systems without a sufficiently
+ large swap device should not enable self-ballooning.
+
config XEN_SCRUB_PAGES
bool "Scrub pages before returning them to system"
depends on XEN_BALLOON
@@ -105,4 +122,33 @@ config SWIOTLB_XEN
depends on PCI
select SWIOTLB
+config XEN_TMEM
+ bool
+ default y if (CLEANCACHE || FRONTSWAP)
+ help
+ Shim to interface in-kernel Transcendent Memory hooks
+ (e.g. cleancache and frontswap) to Xen tmem hypercalls.
+
+config XEN_PCIDEV_BACKEND
+ tristate "Xen PCI-device backend driver"
+ depends on PCI && X86 && XEN
+ depends on XEN_BACKEND
+ default m
+ help
+ The PCI device backend driver allows the kernel to export arbitrary
+ PCI devices to other guests. If you select this to be a module, you
+ will need to make sure no other driver has bound to the device(s)
+ you want to make visible to other guests.
+
+ The parameter "passthrough" allows you specify how you want the PCI
+ devices to appear in the guest. You can choose the default (0) where
+ PCI topology starts at 00.00.0, or (1) for passthrough if you want
+ the PCI devices topology appear the same as in the host.
+
+ The "hide" parameter (only applicable if backend driver is compiled
+ into the kernel) allows you to bind the PCI devices to this module
+ from the default device drivers. The argument is the list of PCI BDFs:
+ xen-pciback.hide=(03:00.0)(04:00.0)
+
+ If in doubt, say m.
endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index bbc18258ecc..72bbb27d7a6 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,6 +1,5 @@
obj-y += grant-table.o features.o events.o manage.o balloon.o
obj-y += xenbus/
-obj-y += tmem.o
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_features.o := $(nostackp)
@@ -9,14 +8,17 @@ obj-$(CONFIG_BLOCK) += biomerge.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o
+obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o
obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
+obj-$(CONFIG_XEN_TMEM) += tmem.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_DOM0) += pci.o
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 816a44959ef..d369965e8f8 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -1,7 +1,7 @@
/*
* Xen implementation for transcendent memory (tmem)
*
- * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
+ * Copyright (C) 2009-2011 Oracle Corp. All rights reserved.
* Author: Dan Magenheimer
*/
@@ -9,8 +9,14 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/pagemap.h>
+#include <linux/module.h>
#include <linux/cleancache.h>
+/* temporary ifdef until include/linux/frontswap.h is upstream */
+#ifdef CONFIG_FRONTSWAP
+#include <linux/frontswap.h>
+#endif
+
#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <asm/xen/hypercall.h>
@@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid)
return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
}
-static int xen_tmem_destroy_pool(u32 pool_id)
-{
- struct tmem_oid oid = { { 0 } };
-
- return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
-}
-
-int tmem_enabled;
+int tmem_enabled __read_mostly;
+EXPORT_SYMBOL(tmem_enabled);
static int __init enable_tmem(char *s)
{
@@ -139,6 +139,14 @@ static int __init enable_tmem(char *s)
__setup("tmem", enable_tmem);
+#ifdef CONFIG_CLEANCACHE
+static int xen_tmem_destroy_pool(u32 pool_id)
+{
+ struct tmem_oid oid = { { 0 } };
+
+ return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
+}
+
/* cleancache ops */
static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key,
@@ -240,18 +248,156 @@ static struct cleancache_ops tmem_cleancache_ops = {
.init_shared_fs = tmem_cleancache_init_shared_fs,
.init_fs = tmem_cleancache_init_fs
};
+#endif
-static int __init xen_tmem_init(void)
+#ifdef CONFIG_FRONTSWAP
+/* frontswap tmem operations */
+
+/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
+static int tmem_frontswap_poolid;
+
+/*
+ * Swizzling increases objects per swaptype, increasing tmem concurrency
+ * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
+ */
+#define SWIZ_BITS 4
+#define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
+#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
+#define iswiz(_ind) (_ind >> SWIZ_BITS)
+
+static inline struct tmem_oid oswiz(unsigned type, u32 ind)
{
- struct cleancache_ops old_ops;
+ struct tmem_oid oid = { .oid = { 0 } };
+ oid.oid[0] = _oswiz(type, ind);
+ return oid;
+}
+/* returns 0 if the page was successfully put into frontswap, -1 if not */
+static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
+ struct page *page)
+{
+ u64 ind64 = (u64)offset;
+ u32 ind = (u32)offset;
+ unsigned long pfn = page_to_pfn(page);
+ int pool = tmem_frontswap_poolid;
+ int ret;
+
+ if (pool < 0)
+ return -1;
+ if (ind64 != ind)
+ return -1;
+ mb(); /* ensure page is quiescent; tmem may address it with an alias */
+ ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn);
+ /* translate Xen tmem return values to linux semantics */
+ if (ret == 1)
+ return 0;
+ else
+ return -1;
+}
+
+/*
+ * returns 0 if the page was successfully gotten from frontswap, -1 if
+ * was not present (should never happen!)
+ */
+static int tmem_frontswap_get_page(unsigned type, pgoff_t offset,
+ struct page *page)
+{
+ u64 ind64 = (u64)offset;
+ u32 ind = (u32)offset;
+ unsigned long pfn = page_to_pfn(page);
+ int pool = tmem_frontswap_poolid;
+ int ret;
+
+ if (pool < 0)
+ return -1;
+ if (ind64 != ind)
+ return -1;
+ ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn);
+ /* translate Xen tmem return values to linux semantics */
+ if (ret == 1)
+ return 0;
+ else
+ return -1;
+}
+
+/* flush a single page from frontswap */
+static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset)
+{
+ u64 ind64 = (u64)offset;
+ u32 ind = (u32)offset;
+ int pool = tmem_frontswap_poolid;
+
+ if (pool < 0)
+ return;
+ if (ind64 != ind)
+ return;
+ (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind));
+}
+
+/* flush all pages from the passed swaptype */
+static void tmem_frontswap_flush_area(unsigned type)
+{
+ int pool = tmem_frontswap_poolid;
+ int ind;
+
+ if (pool < 0)
+ return;
+ for (ind = SWIZ_MASK; ind >= 0; ind--)
+ (void)xen_tmem_flush_object(pool, oswiz(type, ind));
+}
+
+static void tmem_frontswap_init(unsigned ignored)
+{
+ struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID;
+
+ /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
+ if (tmem_frontswap_poolid < 0)
+ tmem_frontswap_poolid =
+ xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE);
+}
+
+static int __initdata use_frontswap = 1;
+
+static int __init no_frontswap(char *s)
+{
+ use_frontswap = 0;
+ return 1;
+}
+
+__setup("nofrontswap", no_frontswap);
+
+static struct frontswap_ops tmem_frontswap_ops = {
+ .put_page = tmem_frontswap_put_page,
+ .get_page = tmem_frontswap_get_page,
+ .flush_page = tmem_frontswap_flush_page,
+ .flush_area = tmem_frontswap_flush_area,
+ .init = tmem_frontswap_init
+};
+#endif
+
+static int __init xen_tmem_init(void)
+{
if (!xen_domain())
return 0;
+#ifdef CONFIG_FRONTSWAP
+ if (tmem_enabled && use_frontswap) {
+ char *s = "";
+ struct frontswap_ops old_ops =
+ frontswap_register_ops(&tmem_frontswap_ops);
+
+ tmem_frontswap_poolid = -1;
+ if (old_ops.init != NULL)
+ s = " (WARNING: frontswap_ops overridden)";
+ printk(KERN_INFO "frontswap enabled, RAM provided by "
+ "Xen Transcendent Memory\n");
+ }
+#endif
#ifdef CONFIG_CLEANCACHE
BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
if (tmem_enabled && use_cleancache) {
char *s = "";
- old_ops = cleancache_register_ops(&tmem_cleancache_ops);
+ struct cleancache_ops old_ops =
+ cleancache_register_ops(&tmem_cleancache_ops);
if (old_ops.init_fs != NULL)
s = " (WARNING: cleancache_ops overridden)";
printk(KERN_INFO "cleancache enabled, RAM provided by "
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index a4ff225ee86..5c9dc43c1e9 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -98,6 +98,8 @@ static int __init balloon_init(void)
register_balloon(&balloon_sysdev);
+ register_xen_selfballooning(&balloon_sysdev);
+
target_watch.callback = watch_target;
xenstore_notifier.notifier_call = balloon_init_watcher;
diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile
new file mode 100644
index 00000000000..ffe0ad3438b
--- /dev/null
+++ b/drivers/xen/xen-pciback/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
+
+xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
+xen-pciback-y += conf_space.o conf_space_header.o \
+ conf_space_capability.o \
+ conf_space_quirks.o vpci.o \
+ passthrough.o
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
new file mode 100644
index 00000000000..a8031445d94
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -0,0 +1,438 @@
+/*
+ * PCI Backend - Functions for creating a virtual configuration space for
+ * exported PCI Devices.
+ * It's dangerous to allow PCI Driver Domains to change their
+ * device's resources (memory, i/o ports, interrupts). We need to
+ * restrict changes to certain PCI Configuration registers:
+ * BARs, INTERRUPT_PIN, most registers in the header...
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include "pciback.h"
+#include "conf_space.h"
+#include "conf_space_quirks.h"
+
+#define DRV_NAME "xen-pciback"
+static int permissive;
+module_param(permissive, bool, 0644);
+
+/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
+ * xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */
+#define DEFINE_PCI_CONFIG(op, size, type) \
+int xen_pcibk_##op##_config_##size \
+(struct pci_dev *dev, int offset, type value, void *data) \
+{ \
+ return pci_##op##_config_##size(dev, offset, value); \
+}
+
+DEFINE_PCI_CONFIG(read, byte, u8 *)
+DEFINE_PCI_CONFIG(read, word, u16 *)
+DEFINE_PCI_CONFIG(read, dword, u32 *)
+
+DEFINE_PCI_CONFIG(write, byte, u8)
+DEFINE_PCI_CONFIG(write, word, u16)
+DEFINE_PCI_CONFIG(write, dword, u32)
+
+static int conf_space_read(struct pci_dev *dev,
+ const struct config_field_entry *entry,
+ int offset, u32 *value)
+{
+ int ret = 0;
+ const struct config_field *field = entry->field;
+
+ *value = 0;
+
+ switch (field->size) {
+ case 1:
+ if (field->u.b.read)
+ ret = field->u.b.read(dev, offset, (u8 *) value,
+ entry->data);
+ break;
+ case 2:
+ if (field->u.w.read)
+ ret = field->u.w.read(dev, offset, (u16 *) value,
+ entry->data);
+ break;
+ case 4:
+ if (field->u.dw.read)
+ ret = field->u.dw.read(dev, offset, value, entry->data);
+ break;
+ }
+ return ret;
+}
+
+static int conf_space_write(struct pci_dev *dev,
+ const struct config_field_entry *entry,
+ int offset, u32 value)
+{
+ int ret = 0;
+ const struct config_field *field = entry->field;
+
+ switch (field->size) {
+ case 1:
+ if (field->u.b.write)
+ ret = field->u.b.write(dev, offset, (u8) value,
+ entry->data);
+ break;
+ case 2:
+ if (field->u.w.write)
+ ret = field->u.w.write(dev, offset, (u16) value,
+ entry->data);
+ break;
+ case 4:
+ if (field->u.dw.write)
+ ret = field->u.dw.write(dev, offset, value,
+ entry->data);
+ break;
+ }
+ return ret;
+}
+
+static inline u32 get_mask(int size)
+{
+ if (size == 1)
+ return 0xff;
+ else if (size == 2)
+ return 0xffff;
+ else
+ return 0xffffffff;
+}
+
+static inline int valid_request(int offset, int size)
+{
+ /* Validate request (no un-aligned requests) */
+ if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
+ return 1;
+ return 0;
+}
+
+static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
+ int offset)
+{
+ if (offset >= 0) {
+ new_val_mask <<= (offset * 8);
+ new_val <<= (offset * 8);
+ } else {
+ new_val_mask >>= (offset * -8);
+ new_val >>= (offset * -8);
+ }
+ val = (val & ~new_val_mask) | (new_val & new_val_mask);
+
+ return val;
+}
+
+static int pcibios_err_to_errno(int err)
+{
+ switch (err) {
+ case PCIBIOS_SUCCESSFUL:
+ return XEN_PCI_ERR_success;
+ case PCIBIOS_DEVICE_NOT_FOUND:
+ return XEN_PCI_ERR_dev_not_found;
+ case PCIBIOS_BAD_REGISTER_NUMBER:
+ return XEN_PCI_ERR_invalid_offset;
+ case PCIBIOS_FUNC_NOT_SUPPORTED:
+ return XEN_PCI_ERR_not_implemented;
+ case PCIBIOS_SET_FAILED:
+ return XEN_PCI_ERR_access_denied;
+ }
+ return err;
+}
+
+int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
+ u32 *ret_val)
+{
+ int err = 0;
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ const struct config_field_entry *cfg_entry;
+ const struct config_field *field;
+ int req_start, req_end, field_start, field_end;
+ /* if read fails for any reason, return 0
+ * (as if device didn't respond) */
+ u32 value = 0, tmp_val;
+
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
+ pci_name(dev), size, offset);
+
+ if (!valid_request(offset, size)) {
+ err = XEN_PCI_ERR_invalid_offset;
+ goto out;
+ }
+
+ /* Get the real value first, then modify as appropriate */
+ switch (size) {
+ case 1:
+ err = pci_read_config_byte(dev, offset, (u8 *) &value);
+ break;
+ case 2:
+ err = pci_read_config_word(dev, offset, (u16 *) &value);
+ break;
+ case 4:
+ err = pci_read_config_dword(dev, offset, &value);
+ break;
+ }
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ req_start = offset;
+ req_end = offset + size;
+ field_start = OFFSET(cfg_entry);
+ field_end = OFFSET(cfg_entry) + field->size;
+
+ if ((req_start >= field_start && req_start < field_end)
+ || (req_end > field_start && req_end <= field_end)) {
+ err = conf_space_read(dev, cfg_entry, field_start,
+ &tmp_val);
+ if (err)
+ goto out;
+
+ value = merge_value(value, tmp_val,
+ get_mask(field->size),
+ field_start - req_start);
+ }
+ }
+
+out:
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n",
+ pci_name(dev), size, offset, value);
+
+ *ret_val = value;
+ return pcibios_err_to_errno(err);
+}
+
+int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
+{
+ int err = 0, handled = 0;
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ const struct config_field_entry *cfg_entry;
+ const struct config_field *field;
+ u32 tmp_val;
+ int req_start, req_end, field_start, field_end;
+
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG
+ DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n",
+ pci_name(dev), size, offset, value);
+
+ if (!valid_request(offset, size))
+ return XEN_PCI_ERR_invalid_offset;
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ req_start = offset;
+ req_end = offset + size;
+ field_start = OFFSET(cfg_entry);
+ field_end = OFFSET(cfg_entry) + field->size;
+
+ if ((req_start >= field_start && req_start < field_end)
+ || (req_end > field_start && req_end <= field_end)) {
+ tmp_val = 0;
+
+ err = xen_pcibk_config_read(dev, field_start,
+ field->size, &tmp_val);
+ if (err)
+ break;
+
+ tmp_val = merge_value(tmp_val, value, get_mask(size),
+ req_start - field_start);
+
+ err = conf_space_write(dev, cfg_entry, field_start,
+ tmp_val);
+
+ /* handled is set true here, but not every byte
+ * may have been written! Properly detecting if
+ * every byte is handled is unnecessary as the
+ * flag is used to detect devices that need
+ * special helpers to work correctly.
+ */
+ handled = 1;
+ }
+ }
+
+ if (!handled && !err) {
+ /* By default, anything not specificially handled above is
+ * read-only. The permissive flag changes this behavior so
+ * that anything not specifically handled above is writable.
+ * This means that some fields may still be read-only because
+ * they have entries in the config_field list that intercept
+ * the write and do nothing. */
+ if (dev_data->permissive || permissive) {
+ switch (size) {
+ case 1:
+ err = pci_write_config_byte(dev, offset,
+ (u8) value);
+ break;
+ case 2:
+ err = pci_write_config_word(dev, offset,
+ (u16) value);
+ break;
+ case 4:
+ err = pci_write_config_dword(dev, offset,
+ (u32) value);
+ break;
+ }
+ } else if (!dev_data->warned_on_write) {
+ dev_data->warned_on_write = 1;
+ dev_warn(&dev->dev, "Driver tried to write to a "
+ "read-only configuration space field at offset"
+ " 0x%x, size %d. This may be harmless, but if "
+ "you have problems with your device:\n"
+ "1) see permissive attribute in sysfs\n"
+ "2) report problems to the xen-devel "
+ "mailing list along with details of your "
+ "device obtained from lspci.\n", offset, size);
+ }
+ }
+
+ return pcibios_err_to_errno(err);
+}
+
+void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
+{
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ struct config_field_entry *cfg_entry, *t;
+ const struct config_field *field;
+
+ dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
+ "configuration space fields\n");
+ if (!dev_data)
+ return;
+
+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ if (field->clean) {
+ field->clean((struct config_field *)field);
+
+ kfree(cfg_entry->data);
+
+ list_del(&cfg_entry->list);
+ kfree(cfg_entry);
+ }
+
+ }
+}
+
+void xen_pcibk_config_reset_dev(struct pci_dev *dev)
+{
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ const struct config_field_entry *cfg_entry;
+ const struct config_field *field;
+
+ dev_dbg(&dev->dev, "resetting virtual configuration space\n");
+ if (!dev_data)
+ return;
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ if (field->reset)
+ field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
+ }
+}
+
+void xen_pcibk_config_free_dev(struct pci_dev *dev)
+{
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ struct config_field_entry *cfg_entry, *t;
+ const struct config_field *field;
+
+ dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
+ if (!dev_data)
+ return;
+
+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
+ list_del(&cfg_entry->list);
+
+ field = cfg_entry->field;
+
+ if (field->release)
+ field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
+
+ kfree(cfg_entry);
+ }
+}
+
+int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
+ const struct config_field *field,
+ unsigned int base_offset)
+{
+ int err = 0;
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+ struct config_field_entry *cfg_entry;
+ void *tmp;
+
+ cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
+ if (!cfg_entry) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ cfg_entry->data = NULL;
+ cfg_entry->field = field;
+ cfg_entry->base_offset = base_offset;
+
+ /* silently ignore duplicate fields */
+ err = xen_pcibk_field_is_dup(dev, OFFSET(cfg_entry));
+ if (err)
+ goto out;
+
+ if (field->init) {
+ tmp = field->init(dev, OFFSET(cfg_entry));
+
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ goto out;
+ }
+
+ cfg_entry->data = tmp;
+ }
+
+ dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
+ OFFSET(cfg_entry));
+ list_add_tail(&cfg_entry->list, &dev_data->config_fields);
+
+out:
+ if (err)
+ kfree(cfg_entry);
+
+ return err;
+}
+
+/* This sets up the device's virtual configuration space to keep track of
+ * certain registers (like the base address registers (BARs) so that we can
+ * keep the client from manipulating them directly.
+ */
+int xen_pcibk_config_init_dev(struct pci_dev *dev)
+{
+ int err = 0;
+ struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+
+ dev_dbg(&dev->dev, "initializing virtual configuration space\n");
+
+ INIT_LIST_HEAD(&dev_data->config_fields);
+
+ err = xen_pcibk_config_header_add_fields(dev);
+ if (err)
+ goto out;
+
+ err = xen_pcibk_config_capability_add_fields(dev);
+ if (err)
+ goto out;
+
+ err = xen_pcibk_config_quirks_init(dev);
+
+out:
+ return err;
+}
+
+int xen_pcibk_config_init(void)
+{
+ return xen_pcibk_config_capability_init();
+}
diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h
new file mode 100644
index 00000000000..e56c934ad13
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space.h
@@ -0,0 +1,126 @@
+/*
+ * PCI Backend - Common data structures for overriding the configuration space
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#ifndef __XEN_PCIBACK_CONF_SPACE_H__
+#define __XEN_PCIBACK_CONF_SPACE_H__
+
+#include <linux/list.h>
+#include <linux/err.h>
+
+/* conf_field_init can return an errno in a ptr with ERR_PTR() */
+typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
+typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
+typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
+
+typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
+ void *data);
+typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
+ void *data);
+typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
+ void *data);
+typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
+ void *data);
+typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
+ void *data);
+typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
+ void *data);
+
+/* These are the fields within the configuration space which we
+ * are interested in intercepting reads/writes to and changing their
+ * values.
+ */
+struct config_field {
+ unsigned int offset;
+ unsigned int size;
+ unsigned int mask;
+ conf_field_init init;
+ conf_field_reset reset;
+ conf_field_free release;
+ void (*clean) (struct config_field *field);
+ union {
+ struct {
+ conf_dword_write write;
+ conf_dword_read read;
+ } dw;
+ struct {
+ conf_word_write write;
+ conf_word_read read;
+ } w;
+ struct {
+ conf_byte_write write;
+ conf_byte_read read;
+ } b;
+ } u;
+ struct list_head list;
+};
+
+struct config_field_entry {
+ struct list_head list;
+ const struct config_field *field;
+ unsigned int base_offset;
+ void *data;
+};
+
+#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
+
+/* Add fields to a device - the add_fields macro expects to get a pointer to
+ * the first entry in an array (of which the ending is marked by size==0)
+ */
+int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
+ const struct config_field *field,
+ unsigned int offset);
+
+static inline int xen_pcibk_config_add_field(struct pci_dev *dev,
+ const struct config_field *field)
+{
+ return xen_pcibk_config_add_field_offset(dev, field, 0);
+}
+
+static inline int xen_pcibk_config_add_fields(struct pci_dev *dev,
+ const struct config_field *field)
+{
+ int i, err = 0;
+ for (i = 0; field[i].size != 0; i++) {
+ err = xen_pcibk_config_add_field(dev, &field[i]);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+static inline int xen_pcibk_config_add_fields_offset(struct pci_dev *dev,
+ const struct config_field *field,
+ unsigned int offset)
+{
+ int i, err = 0;
+ for (i = 0; field[i].size != 0; i++) {
+ err = xen_pcibk_config_add_field_offset(dev, &field[i], offset);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+/* Read/Write the real configuration space */
+int xen_pcibk_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
+ void *data);
+int xen_pcibk_read_config_word(struct pci_dev *dev, int offset, u16 *value,
+ void *data);
+int xen_pcibk_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
+ void *data);
+int xen_pcibk_write_config_byte(struct pci_dev *dev, int offset, u8 value,
+ void *data);
+int xen_pcibk_write_config_word(struct pci_dev *dev, int offset, u16 value,
+ void *data);
+int xen_pcibk_write_config_dword(struct pci_dev *dev, int offset, u32 value,
+ void *data);
+
+int xen_pcibk_config_capability_init(void);
+
+int xen_pcibk_config_header_add_fields(struct pci_dev *dev);
+int xen_pcibk_config_capability_add_fields(struct pci_dev *dev);
+
+#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c
new file mode 100644
index 00000000000..7f83e9083e9
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_capab