34 files changed, 1833 insertions, 1095 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 23eae5cb69c..38fb36e1c59 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -3,7 +3,6 @@ menu "Xen driver support"
 
 config XEN_BALLOON
 	bool "Xen memory balloon driver"
-	depends on !ARM
 	default y
 	help
 	  The balloon driver allows the Xen domain to request more memory from
@@ -140,7 +139,6 @@ config XEN_GRANT_DEV_ALLOC
 
 config SWIOTLB_XEN
 	def_bool y
-	depends on PCI && X86
 	select SWIOTLB
 
 config XEN_TMEM
@@ -223,7 +221,7 @@ config XEN_ACPI_PROCESSOR
 
 	  To do that the driver parses the Power Management data and uploads
 	  said information to the Xen hypervisor. Then the Xen hypervisor can
-	  select the proper Cx and Pxx states. It also registers itslef as the
+	  select the proper Cx and Pxx states. It also registers itself as the
 	  SMM so that other drivers (such as ACPI cpufreq scaling driver) will
 	  not load.
 
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 14fe79d8634..45e00afa7f2 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -2,7 +2,8 @@ ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),)
 obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
 endif
 obj-$(CONFIG_X86)			+= fallback.o
-obj-y	+= grant-table.o features.o events.o balloon.o manage.o
+obj-y	+= grant-table.o features.o balloon.o manage.o
+obj-y	+= events/
 obj-y	+= xenbus/
 
 nostackp := $(call cc-option, -fno-stack-protector)
@@ -15,7 +16,6 @@ xen-pad-$(CONFIG_X86) += xen-acpi-pad.o
 dom0-$(CONFIG_X86) += pcpu.o
 obj-$(CONFIG_XEN_DOM0)			+= $(dom0-y)
 obj-$(CONFIG_BLOCK)			+= biomerge.o
-obj-$(CONFIG_XEN_XENCOMM)		+= xencomm.o
 obj-$(CONFIG_XEN_BALLOON)		+= xen-balloon.o
 obj-$(CONFIG_XEN_SELFBALLOONING)	+= xen-selfballoon.o
 obj-$(CONFIG_XEN_DEV_EVTCHN)		+= xen-evtchn.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index a50c6e3a7cc..5c660c77f03 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -157,13 +157,6 @@ static struct page *balloon_retrieve(bool prefer_highmem)
 	return page;
 }
 
-static struct page *balloon_first_page(void)
-{
-	if (list_empty(&ballooned_pages))
-		return NULL;
-	return list_entry(ballooned_pages.next, struct page, lru);
-}
-
 static struct page *balloon_next_page(struct page *page)
 {
 	struct list_head *next = page->lru.next;
@@ -328,7 +321,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
 	if (nr_pages > ARRAY_SIZE(frame_list))
 		nr_pages = ARRAY_SIZE(frame_list);
 
-	page = balloon_first_page();
+	page = list_first_entry_or_null(&ballooned_pages, struct page, lru);
 	for (i = 0; i < nr_pages; i++) {
 		if (!page) {
 			nr_pages = i;
@@ -350,17 +343,19 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
 
 		pfn = page_to_pfn(page);
 
-		set_phys_to_machine(pfn, frame_list[i]);
-
 #ifdef CONFIG_XEN_HAVE_PVMMU
-		/* Link back into the page tables if not highmem. */
-		if (xen_pv_domain() && !PageHighMem(page)) {
-			int ret;
-			ret = HYPERVISOR_update_va_mapping(
-				(unsigned long)__va(pfn << PAGE_SHIFT),
-				mfn_pte(frame_list[i], PAGE_KERNEL),
-				0);
-			BUG_ON(ret);
+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+			set_phys_to_machine(pfn, frame_list[i]);
+
+			/* Link back into the page tables if not highmem. */
+			if (!PageHighMem(page)) {
+				int ret;
+				ret = HYPERVISOR_update_va_mapping(
+						(unsigned long)__va(pfn << PAGE_SHIFT),
+						mfn_pte(frame_list[i], PAGE_KERNEL),
+						0);
+				BUG_ON(ret);
+			}
 		}
 #endif
 
@@ -378,7 +373,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 	enum bp_state state = BP_DONE;
 	unsigned long  pfn, i;
 	struct page   *page;
-	struct page   *scratch_page;
 	int ret;
 	struct xen_memory_reservation reservation = {
 		.address_bits = 0,
@@ -398,8 +392,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 	if (nr_pages > ARRAY_SIZE(frame_list))
 		nr_pages = ARRAY_SIZE(frame_list);
 
-	scratch_page = get_balloon_scratch_page();
-
 	for (i = 0; i < nr_pages; i++) {
 		page = alloc_page(gfp);
 		if (page == NULL) {
@@ -407,39 +399,52 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 			state = BP_EAGAIN;
 			break;
 		}
-
-		pfn = page_to_pfn(page);
-		frame_list[i] = pfn_to_mfn(pfn);
-
 		scrub_page(page);
 
-#ifdef CONFIG_XEN_HAVE_PVMMU
-		if (xen_pv_domain() && !PageHighMem(page)) {
-			ret = HYPERVISOR_update_va_mapping(
-				(unsigned long)__va(pfn << PAGE_SHIFT),
-				pfn_pte(page_to_pfn(scratch_page),
-					PAGE_KERNEL_RO), 0);
-			BUG_ON(ret);
-		}
-#endif
+		frame_list[i] = page_to_pfn(page);
 	}
 
-	/* Ensure that ballooned highmem pages don't have kmaps. */
+	/*
+	 * Ensure that ballooned highmem pages don't have kmaps.
+	 *
+	 * Do this before changing the p2m as kmap_flush_unused()
+	 * reads PTEs to obtain pages (and hence needs the original
+	 * p2m entry).
+	 */
 	kmap_flush_unused();
-	flush_tlb_all();
 
-	/* No more mappings: invalidate P2M and add to balloon. */
+	/* Update direct mapping, invalidate P2M, and add to balloon. */
 	for (i = 0; i < nr_pages; i++) {
-		pfn = mfn_to_pfn(frame_list[i]);
+		pfn = frame_list[i];
+		frame_list[i] = pfn_to_mfn(pfn);
+		page = pfn_to_page(pfn);
+
+#ifdef CONFIG_XEN_HAVE_PVMMU
+		/*
+		 * Ballooned out frames are effectively replaced with
+		 * a scratch frame.  Ensure direct mappings and the
+		 * p2m are consistent.
+		 */
 		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-			unsigned long p;
-			p = page_to_pfn(scratch_page);
-			__set_phys_to_machine(pfn, pfn_to_mfn(p));
+			if (!PageHighMem(page)) {
+				struct page *scratch_page = get_balloon_scratch_page();
+
+				ret = HYPERVISOR_update_va_mapping(
+						(unsigned long)__va(pfn << PAGE_SHIFT),
+						pfn_pte(page_to_pfn(scratch_page),
+							PAGE_KERNEL_RO), 0);
+				BUG_ON(ret);
+
+				put_balloon_scratch_page();
+			}
+			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 		}
-		balloon_append(pfn_to_page(pfn));
+#endif
+
+		balloon_append(page);
 	}
 
-	put_balloon_scratch_page();
+	flush_tlb_all();
 
 	set_xen_guest_handle(reservation.extent_start, frame_list);
 	reservation.nr_extents   = nr_pages;
@@ -597,19 +602,29 @@ static void __init balloon_add_region(unsigned long start_pfn,
 	}
 }
 
-static int __cpuinit balloon_cpu_notify(struct notifier_block *self,
+static int alloc_balloon_scratch_page(int cpu)
+{
+	if (per_cpu(balloon_scratch_page, cpu) != NULL)
+		return 0;
+
+	per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+	if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+		pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+
+static int balloon_cpu_notify(struct notifier_block *self,
 				    unsigned long action, void *hcpu)
 {
 	int cpu = (long)hcpu;
 	switch (action) {
 	case CPU_UP_PREPARE:
-		if (per_cpu(balloon_scratch_page, cpu) != NULL)
-			break;
-		per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
-		if (per_cpu(balloon_scratch_page, cpu) == NULL) {
-			pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
+		if (alloc_balloon_scratch_page(cpu))
 			return NOTIFY_BAD;
-		}
 		break;
 	default:
 		break;
@@ -617,7 +632,7 @@ static int __cpuinit balloon_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block balloon_cpu_notifier __cpuinitdata = {
+static struct notifier_block balloon_cpu_notifier = {
 	.notifier_call	= balloon_cpu_notify,
 };
 
@@ -628,21 +643,25 @@ static int __init balloon_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
-	for_each_online_cpu(cpu)
-	{
-		per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
-		if (per_cpu(balloon_scratch_page, cpu) == NULL) {
-			pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
-			return -ENOMEM;
+	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+		register_cpu_notifier(&balloon_cpu_notifier);
+
+		get_online_cpus();
+		for_each_online_cpu(cpu) {
+			if (alloc_balloon_scratch_page(cpu)) {
+				put_online_cpus();
+				unregister_cpu_notifier(&balloon_cpu_notifier);
+				return -ENOMEM;
+			}
 		}
+		put_online_cpus();
 	}
-	register_cpu_notifier(&balloon_cpu_notifier);
 
 	pr_info("Initialising balloon driver\n");
 
 	balloon_stats.current_pages = xen_pv_domain()
 		? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
-		: max_pfn;
+		: get_num_physpages();
 	balloon_stats.target_pages  = balloon_stats.current_pages;
 	balloon_stats.balloon_low   = 0;
 	balloon_stats.balloon_high  = 0;
diff --git a/drivers/xen/dbgp.c b/drivers/xen/dbgp.c
index f3ccc80a455..8145a59fd9f 100644
--- a/drivers/xen/dbgp.c
+++ b/drivers/xen/dbgp.c
@@ -19,7 +19,7 @@ static int xen_dbgp_op(struct usb_hcd *hcd, int op)
 	dbgp.op = op;
 
 #ifdef CONFIG_PCI
-	if (ctrlr->bus == &pci_bus_type) {
+	if (dev_is_pci(ctrlr)) {
 		const struct pci_dev *pdev = to_pci_dev(ctrlr);
 
 		dbgp.u.pci.seg = pci_domain_nr(pdev->bus);
diff --git a/drivers/xen/events/Makefile b/drivers/xen/events/Makefile
new file mode 100644
index 00000000000..62be55cd981
--- /dev/null
+++ b/drivers/xen/events/Makefile
@@ -0,0 +1,5 @@
+obj-y += events.o
+
+events-y += events_base.o
+events-y += events_2l.o
+events-y += events_fifo.o
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
new file mode 100644
index 00000000000..5db43fc100a
--- /dev/null
+++ b/drivers/xen/events/events_2l.c
@@ -0,0 +1,365 @@
+/*
+ * Xen event channels (2-level ABI)
+ *
+ * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+#include "events_internal.h"
+
+/*
+ * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
+ * careful to only use bitops which allow for this (e.g
+ * test_bit/find_first_bit and friends but not __ffs) and to pass
+ * BITS_PER_EVTCHN_WORD as the bitmask length.
+ */
+#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
+/*
+ * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
+ * array. Primarily to avoid long lines (hence the terse name).
+ */
+#define BM(x) (unsigned long *)(x)
+/* Find the first set bit in a evtchn mask */
+#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
+
+static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD],
+		      cpu_evtchn_mask);
+
+static unsigned evtchn_2l_max_channels(void)
+{
+	return EVTCHN_2L_NR_CHANNELS;
+}
+
+static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu)
+{
+	clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu)));
+	set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
+}
+
+static void evtchn_2l_clear_pending(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	sync_clear_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static void evtchn_2l_set_pending(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	sync_set_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static bool evtchn_2l_is_pending(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	return sync_test_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static bool evtchn_2l_test_and_set_mask(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
+}
+
+static void evtchn_2l_mask(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	sync_set_bit(port, BM(&s->evtchn_mask[0]));
+}
+
+static void evtchn_2l_unmask(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	unsigned int cpu = get_cpu();
+	int do_hypercall = 0, evtchn_pending = 0;
+
+	BUG_ON(!irqs_disabled());
+
+	if (unlikely((cpu != cpu_from_evtchn(port))))
+		do_hypercall = 1;
+	else {
+		/*
+		 * Need to clear the mask before checking pending to
+		 * avoid a race with an event becoming pending.
+		 *
+		 * EVTCHNOP_unmask will only trigger an upcall if the
+		 * mask bit was set, so if a hypercall is needed
+		 * remask the event.
+		 */
+		sync_clear_bit(port, BM(&s->evtchn_mask[0]));
+		evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
+
+		if (unlikely(evtchn_pending && xen_hvm_domain())) {
+			sync_set_bit(port, BM(&s->evtchn_mask[0]));
+			do_hypercall = 1;
+		}
+	}
+
+	/* Slow path (hypercall) if this is a non-local port or if this is
+	 * an hvm domain and an event is pending (hvm domains don't have
+	 * their own implementation of irq_enable). */
+	if (do_hypercall) {
+		struct evtchn_unmask unmask = { .port = port };
+		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+	} else {
+		struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+		/*
+		 * The following is basically the equivalent of
+		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+		 * the interrupt edge' if the channel is masked.
+		 */
+		if (evtchn_pending &&
+		    !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
+					   BM(&vcpu_info->evtchn_pending_sel)))
+			vcpu_info->evtchn_upcall_pending = 1;
+	}
+
+	put_cpu();
+}
+
+static DEFINE_PER_CPU(unsigned int, current_word_idx);
+static DEFINE_PER_CPU(unsigned int, current_bit_idx);
+
+/*
+ * Mask out the i least significant bits of w
+ */
+#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
+
+static inline xen_ulong_t active_evtchns(unsigned int cpu,
+					 struct shared_info *sh,
+					 unsigned int idx)
+{
+	return sh->evtchn_pending[idx] &
+		per_cpu(cpu_evtchn_mask, cpu)[idx] &
+		~sh->evtchn_mask[idx];
+}
+
+/*
+ * Search the CPU's pending events bitmasks.  For each one found, map
+ * the event number to an irq, and feed it into do_IRQ() for handling.
+ *
+ * Xen uses a two-level bitmap to speed searching.  The first level is
+ * a bitset of words which contain pending event bits.  The second
+ * level is a bitset of pending events themselves.
+ */
+static void evtchn_2l_handle_events(unsigned cpu)
+{
+	int irq;
+	xen_ulong_t pending_words;
+	xen_ulong_t pending_bits;
+	int start_word_idx, start_bit_idx;
+	int word_idx, bit_idx;
+	int i;
+	struct shared_info *s = HYPERVISOR_shared_info;
+	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+	/* Timer interrupt has highest priority. */
+	irq = irq_from_virq(cpu, VIRQ_TIMER);
+	if (irq != -1) {
+		unsigned int evtchn = evtchn_from_irq(irq);
+		word_idx = evtchn / BITS_PER_LONG;
+		bit_idx = evtchn % BITS_PER_LONG;
+		if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx))
+			generic_handle_irq(irq);
+	}
+
+	/*
+	 * Master flag must be cleared /before/ clearing
+	 * selector flag. xchg_xen_ulong must contain an
+	 * appropriate barrier.
+	 */
+	pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
+
+	start_word_idx = __this_cpu_read(current_word_idx);
+	start_bit_idx = __this_cpu_read(current_bit_idx);
+
+	word_idx = start_word_idx;
+
+	for (i = 0; pending_words != 0; i++) {
+		xen_ulong_t words;
+
+		words = MASK_LSBS(pending_words, word_idx);
+
+		/*
+		 * If we masked out all events, wrap to beginning.
+		 */
+		if (words == 0) {
+			word_idx = 0;
+			bit_idx = 0;
+			continue;
+		}
+		word_idx = EVTCHN_FIRST_BIT(words);
+
+		pending_bits = active_evtchns(cpu, s, word_idx);
+		bit_idx = 0; /* usually scan entire word from start */
+		/*
+		 * We scan the starting word in two parts.
+		 *
+		 * 1st time: start in the middle, scanning the
+		 * upper bits.
+		 *
+		 * 2nd time: scan the whole word (not just the
+		 * parts skipped in the first pass) -- if an
+		 * event in the previously scanned bits is
+		 * pending again it would just be scanned on
+		 * the next loop anyway.
+		 */
+		if (word_idx == start_word_idx) {
+			if (i == 0)
+				bit_idx = start_bit_idx;
+		}
+
+		do {
+			xen_ulong_t bits;
+			int port;
+
+			bits = MASK_LSBS(pending_bits, bit_idx);
+
+			/* If we masked out all events, move on. */
+			if (bits == 0)
+				break;
+
+			bit_idx = EVTCHN_FIRST_BIT(bits);
+
+			/* Process port. */
+			port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
+			irq = get_evtchn_to_irq(port);
+
+			if (irq != -1)
+				generic_handle_irq(irq);
+
+			bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
+
+			/* Next caller starts at last processed + 1 */
+			__this_cpu_write(current_word_idx,
+					 bit_idx ? word_idx :
+					 (word_idx+1) % BITS_PER_EVTCHN_WORD);
+			__this_cpu_write(current_bit_idx, bit_idx);
+		} while (bit_idx != 0);
+
+		/* Scan start_l1i twice; all others once. */
+		if ((word_idx != start_word_idx) || (i != 0))
+			pending_words &= ~(1UL << word_idx);
+
+		word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
+	}
+}
+
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+{
+	struct shared_info *sh = HYPERVISOR_shared_info;
+	int cpu = smp_processor_id();
+	xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+	int i;
+	unsigned long flags;
+	static DEFINE_SPINLOCK(debug_lock);
+	struct vcpu_info *v;
+
+	spin_lock_irqsave(&debug_lock, flags);
+
+	printk("\nvcpu %d\n  ", cpu);
+
+	for_each_online_cpu(i) {
+		int pending;
+		v = per_cpu(xen_vcpu, i);
+		pending = (get_irq_regs() && i == cpu)
+			? xen_irqs_disabled(get_irq_regs())
+			: v->evtchn_upcall_mask;
+		printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n  ", i,
+		       pending, v->evtchn_upcall_pending,
+		       (int)(sizeof(v->evtchn_pending_sel)*2),
+		       v->evtchn_pending_sel);
+	}
+	v = per_cpu(xen_vcpu, cpu);
+
+	printk("\npending:\n   ");
+	for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
+		printk("%0*"PRI_xen_ulong"%s",
+		       (int)sizeof(sh->evtchn_pending[0])*2,
+		       sh->evtchn_pending[i],
+		       i % 8 == 0 ? "\n   " : " ");
+	printk("\nglobal mask:\n   ");
+	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+		printk("%0*"PRI_xen_ulong"%s",
+		       (int)(sizeof(sh->evtchn_mask[0])*2),
+		       sh->evtchn_mask[i],
+		       i % 8 == 0 ? "\n   " : " ");
+
+	printk("\nglobally unmasked:\n   ");
+	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+		printk("%0*"PRI_xen_ulong"%s",
+		       (int)(sizeof(sh->evtchn_mask[0])*2),
+		       sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
+		       i % 8 == 0 ? "\n   " : " ");
+
+	printk("\nlocal cpu%d mask:\n   ", cpu);
+	for (i = (EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
+		printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
+		       cpu_evtchn[i],
+		       i % 8 == 0 ? "\n   " : " ");
+
+	printk("\nlocally unmasked:\n   ");
+	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
+		xen_ulong_t pending = sh->evtchn_pending[i]
+			& ~sh->evtchn_mask[i]
+			& cpu_evtchn[i];
+		printk("%0*"PRI_xen_ulong"%s",
+		       (int)(sizeof(sh->evtchn_mask[0])*2),
+		       pending, i % 8 == 0 ? "\n   " : " ");
+	}
+
+	printk("\npending list:\n");
+	for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
+		if (sync_test_bit(i, BM(sh->evtchn_pending))) {
+			int word_idx = i / BITS_PER_EVTCHN_WORD;
+			printk("  %d: event %d -> irq %d%s%s%s\n",
+			       cpu_from_evtchn(i), i,
+			       get_evtchn_to_irq(i),
+			       sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
+			       ? "" : " l2-clear",
+			       !sync_test_bit(i, BM(sh->evtchn_mask))
+			       ? "" : " globally-masked",
+			       sync_test_bit(i, BM(cpu_evtchn))
+			       ? "" : " locally-masked");
+		}
+	}
+
+	spin_unlock_irqrestore(&debug_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static const struct evtchn_ops evtchn_ops_2l = {
+	.max_channels      = evtchn_2l_max_channels,
+	.nr_channels       = evtchn_2l_max_channels,
+	.bind_to_cpu       = evtchn_2l_bind_to_cpu,
+	.clear_pending     = evtchn_2l_clear_pending,
+	.set_pending       = evtchn_2l_set_pending,
+	.is_pending        = evtchn_2l_is_pending,
+	.test_and_set_mask = evtchn_2l_test_and_set_mask,
+	.mask              = evtchn_2l_mask,
+	.unmask            = evtchn_2l_unmask,
+	.handle_events     = evtchn_2l_handle_events,
+};
+
+void __init xen_evtchn_2l_init(void)
+{
+	pr_info("Using 2-level ABI\n");
+	evtchn_ops = &evtchn_ops_2l;
+}
diff --git a/drivers/xen/events.c b/drivers/xen/events/events_base.c
index 4035e833ea2..c919d3d5c84 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events/events_base.c
@@ -59,6 +59,10 @@
 #include <xen/interface/vcpu.h>
 #include <asm/hw_irq.h>
 
+#include "events_internal.h"
+
+const struct evtchn_ops *evtchn_ops;
+
 /*
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
@@ -73,71 +77,15 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
 /* IRQ <-> IPI mapping */
 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
 
-/* Interrupt types. */
-enum xen_irq_type {
-	IRQT_UNBOUND = 0,
-	IRQT_PIRQ,
-	IRQT_VIRQ,
-	IRQT_IPI,
-	IRQT_EVTCHN
-};
-
-/*
- * Packed IRQ information:
- * type - enum xen_irq_type
- * event channel - irq->event channel mapping
- * cpu - cpu this event channel is bound to
- * index - type-specific information:
- *    PIRQ - physical IRQ, GSI, flags, and owner domain
- *    VIRQ - virq number
- *    IPI - IPI vector
- *    EVTCHN -
- */
-struct irq_info {
-	struct list_head list;
-	int refcnt;
-	enum xen_irq_type type;	/* type */
-	unsigned irq;
-	unsigned short evtchn;	/* event channel */
-	unsigned short cpu;	/* cpu bound */
-
-	union {
-		unsigned short virq;
-		enum ipi_vector ipi;
-		struct {
-			unsigned short pirq;
-			unsigned short gsi;
-			unsigned char flags;
-			uint16_t domid;
-		} pirq;
-	} u;
-};
-#define PIRQ_NEEDS_EOI	(1 << 0)
-#define PIRQ_SHAREABLE	(1 << 1)
-
-static int *evtchn_to_irq;
+int **evtchn_to_irq;
 #ifdef CONFIG_X86
 static unsigned long *pirq_eoi_map;
 #endif
 static bool (*pirq_needs_eoi)(unsigned irq);
 
-/*
- * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
- * careful to only use bitops which allow for this (e.g
- * test_bit/find_first_bit and friends but not __ffs) and to pass
- * BITS_PER_EVTCHN_WORD as the bitmask length.
- */
-#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
-/*
- * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
- * array. Primarily to avoid long lines (hence the terse name).
- */
-#define BM(x) (unsigned long *)(x)
-/* Find the first set bit in a evtchn mask */
-#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
-
-static DEFINE_PER_CPU(xen_ulong_t [NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD],
-		      cpu_evtchn_mask);
+#define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
+#define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
+#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
 
 /* Xen will never allocate port zero for any purpose. */
 #define VALID_EVTCHN(chn)	((chn) != 0)
@@ -148,19 +96,75 @@ static struct irq_chip xen_pirq_chip;
 static void enable_dynirq(struct irq_data *data);
 static void disable_dynirq(struct irq_data *data);
 
+static void clear_evtchn_to_irq_row(unsigned row)
+{
+	unsigned col;
+
+	for (col = 0; col < EVTCHN_PER_ROW; col++)
+		evtchn_to_irq[row][col] = -1;
+}
+
+static void clear_evtchn_to_irq_all(void)
+{
+	unsigned row;
+
+	for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
+		if (evtchn_to_irq[row] == NULL)
+			continue;
+		clear_evtchn_to_irq_row(row);
+	}
+}
+
+static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
+{
+	unsigned row;
+	unsigned col;
+
+	if (evtchn >= xen_evtchn_max_channels())
+		return -EINVAL;
+
+	row = EVTCHN_ROW(evtchn);
+	col = EVTCHN_COL(evtchn);
+
+	if (evtchn_to_irq[row] == NULL) {
+		/* Unallocated irq entries return -1 anyway */
+		if (irq == -1)
+			return 0;
+
+		evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
+		if (evtchn_to_irq[row] == NULL)
+			return -ENOMEM;
+
+		clear_evtchn_to_irq_row(row);
+	}
+
+	evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq;
+	return 0;
+}
+
+int get_evtchn_to_irq(unsigned evtchn)
+{
+	if (evtchn >= xen_evtchn_max_channels())
+		return -1;
+	if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
+		return -1;
+	return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
+}
+
 /* Get info for IRQ */
-static struct irq_info *info_for_irq(unsigned irq)
+struct irq_info *info_for_irq(unsigned irq)
 {
 	return irq_get_handler_data(irq);
 }
 
 /* Constructors for packed IRQ information. */
-static void xen_irq_info_common_init(struct irq_info *info,
+static int xen_irq_info_common_setup(struct irq_info *info,
 				     unsigned irq,
 				     enum xen_irq_type type,
-				     unsigned short evtchn,
+				     unsigned evtchn,
 				     unsigned short cpu)
 {
+	int ret;
 
 	BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
 
@@ -169,68 +173,78 @@ static void xen_irq_info_common_init(struct irq_info *info,
 	info->evtchn = evtchn;
 	info->cpu = cpu;
 
-	evtchn_to_irq[evtchn] = irq;
+	ret = set_evtchn_to_irq(evtchn, irq);
+	if (ret < 0)
+		return ret;
 
 	irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
+
+	return xen_evtchn_port_setup(info);
 }
 
-static void xen_irq_info_evtchn_init(unsigned irq,
-				     unsigned short evtchn)
+static int xen_irq_info_evtchn_setup(unsigned irq,
+				     unsigned evtchn)
 {
 	struct irq_info *info = info_for_irq(irq);
 
-	xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0);
+	return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
 }
 
-static void xen_irq_info_ipi_init(unsigned cpu,
+static int xen_irq_info_ipi_setup(unsigned cpu,
 				  unsigned irq,
-				  unsigned short evtchn,
+				  unsigned evtchn,
 				  enum ipi_vector ipi)
 {
 	struct irq_info *info = info_for_irq(irq);
 
-	xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
-
 	info->u.ipi = ipi;
 
 	per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+
+	return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
 }
 
-static void xen_irq_info_virq_init(unsigned cpu,
+static int xen_irq_info_virq_setup(unsigned cpu,
 				   unsigned irq,
-				   unsigned short evtchn,
-				   unsigned short virq)
+				   unsigned evtchn,
+				   unsigned virq)
 {
 	struct irq_info *info = info_for_irq(irq);
 
-	xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
-
 	info->u.virq = virq;
 
 	per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+	return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
 }
 
-static void xen_irq_info_pirq_init(unsigned irq,
-				   unsigned short evtchn,
-				   unsigned short pirq,
-				   unsigned short gsi,
+static int xen_irq_info_pirq_setup(unsigned irq,
+				   unsigned evtchn,
+				   unsigned pirq,
+				   unsigned gsi,
 				   uint16_t domid,
 				   unsigned char flags)
 {
 	struct irq_info *info = info_for_irq(irq);
 
-	xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
-
 	info->u.pirq.pirq = pirq;
 	info->u.pirq.gsi = gsi;
 	info->u.pirq.domid = domid;
 	info->u.pirq.flags = flags;
+
+	return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
+}
+
+static void xen_irq_info_cleanup(struct irq_info *info)
+{
+	set_evtchn_to_irq(info->evtchn, -1);
+	info->evtchn = 0;
 }
 
 /*
  * Accessors for packed IRQ information.
  */
-static unsigned int evtchn_from_irq(unsigned irq)
+unsigned int evtchn_from_irq(unsigned irq)
 {
 	if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
 		return 0;
@@ -240,10 +254,15 @@ static unsigned int evtchn_from_irq(unsigned irq)
 
 unsigned irq_from_evtchn(unsigned int evtchn)
 {
-	return evtchn_to_irq[evtchn];
+	return get_evtchn_to_irq(evtchn);
 }
 EXPORT_SYMBOL_GPL(irq_from_evtchn);
 
+int irq_from_virq(unsigned int cpu, unsigned int virq)
+{
+	return per_cpu(virq_to_irq, cpu)[virq];
+}
+
 static enum ipi_vector ipi_from_irq(unsigned irq)
 {
 	struct irq_info *info = info_for_irq(irq);
@@ -279,14 +298,14 @@ static enum xen_irq_type type_from_irq(unsigned irq)
 	return info_for_irq(irq)->type;
 }
 
-static unsigned cpu_from_irq(unsigned irq)
+unsigned cpu_from_irq(unsigned irq)
 {
 	return info_for_irq(irq)->cpu;
 }
 
-static unsigned int cpu_from_evtchn(unsigned int evtchn)
+unsigned int cpu_from_evtchn(unsigned int evtchn)
 {
-	int irq = evtchn_to_irq[evtchn];
+	int irq = get_evtchn_to_irq(evtchn);
 	unsigned ret = 0;
 
 	if (irq != -1)
@@ -310,67 +329,28 @@ static bool pirq_needs_eoi_flag(unsigned irq)
 	return info->u.pirq.flags & PIRQ_NEEDS_EOI;
 }
 
-static inline xen_ulong_t active_evtchns(unsigned int cpu,
-					 struct shared_info *sh,
-					 unsigned int idx)
-{
-	return sh->evtchn_pending[idx] &
-		per_cpu(cpu_evtchn_mask, cpu)[idx] &
-		~sh->evtchn_mask[idx];
-}
-
 static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 {
-	int irq = evtchn_to_irq[chn];
+	int irq = get_evtchn_to_irq(chn);
+	struct irq_info *info = info_for_irq(irq);
 
 	BUG_ON(irq == -1);
 #ifdef CONFIG_SMP
-	cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
-#endif
-
-	clear_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu_from_irq(irq))));
-	set_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu)));
-
-	info_for_irq(irq)->cpu = cpu;
-}
-
-static void init_evtchn_cpu_bindings(void)
-{
-	int i;
-#ifdef CONFIG_SMP
-	struct irq_info *info;
-
-	/* By default all event channels notify CPU#0. */
-	list_for_each_entry(info, &xen_irq_list_head, list) {
-		struct irq_desc *desc = irq_to_desc(info->irq);
-		cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
-	}
+	cpumask_copy(irq_get_irq_data(irq)->affinity, cpumask_of(cpu));
 #endif
+	xen_evtchn_port_bind_to_cpu(info, cpu);
 
-	for_each_possible_cpu(i)
-		memset(per_cpu(cpu_evtchn_mask, i),
-		       (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8);
-}
-
-static inline void clear_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_clear_bit(port, BM(&s->evtchn_pending[0]));
+	info->cpu = cpu;
 }
 
-static inline void set_evtchn(int port)
+static void xen_evtchn_mask_all(void)
 {
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, BM(&s->evtchn_pending[0]));
-}
+	unsigned int evtchn;
 
-static inline int test_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	return sync_test_bit(port, BM(&s->evtchn_pending[0]));
+	for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
+		mask_evtchn(evtchn);
 }
 
-
 /**
  * notify_remote_via_irq - send event to remote end of event channel via irq
  * @irq: irq of event channel to send event to
@@ -388,71 +368,12 @@ void notify_remote_via_irq(int irq)
 }
 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
 
-static void mask_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, BM(&s->evtchn_mask[0]));
-}
-
-static void unmask_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	unsigned int cpu = get_cpu();
-	int do_hypercall = 0, evtchn_pending = 0;
-
-	BUG_ON(!irqs_disabled());
-
-	if (unlikely((cpu != cpu_from_evtchn(port))))
-		do_hypercall = 1;
-	else {
-		/*
-		 * Need to clear the mask before checking pending to
-		 * avoid a race with an event becoming pending.
-		 *
-		 * EVTCHNOP_unmask will only trigger an upcall if the
-		 * mask bit was set, so if a hypercall is needed
-		 * remask the event.
-		 */
-		sync_clear_bit(port, BM(&s->evtchn_mask[0]));
-		evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
-
-		if (unlikely(evtchn_pending && xen_hvm_domain())) {
-			sync_set_bit(port, BM(&s->evtchn_mask[0]));
-			do_hypercall = 1;
-		}
-	}
-
-	/* Slow path (hypercall) if this is a non-local port or if this is
-	 * an hvm domain and an event is pending (hvm domains don't have
-	 * their own implementation of irq_enable). */
-	if (do_hypercall) {
-		struct evtchn_unmask unmask = { .port = port };
-		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
-	} else {
-		struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
-
-		/*
-		 * The following is basically the equivalent of
-		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
-		 * the interrupt edge' if the channel is masked.
-		 */
-		if (evtchn_pending &&
-		    !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
-					   BM(&vcpu_info->evtchn_pending_sel)))
-			vcpu_info->evtchn_upcall_pending = 1;
-	}
-
-	put_cpu();
-}
-
 static void xen_irq_init(unsigned irq)
 {
 	struct irq_info *info;
 #ifdef CONFIG_SMP
-	struct irq_desc *desc = irq_to_desc(irq);
-
 	/* By default all event channels notify CPU#0. */
-	cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
+	cpumask_copy(irq_get_irq_data(irq)->affinity, cpumask_of(0));
 #endif
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
@@ -467,29 +388,22 @@ static void xen_irq_init(unsigned irq)
 	list_add_tail(&info->list, &xen_irq_list_head);
 }
 
-static int __must_check xen_allocate_irq_dynamic(void)
+static int __must_check xen_allocate_irqs_dynamic(int nvec)
 {
-	int first = 0;
-	int irq;
+	int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
 
-#ifdef CONFIG_X86_IO_APIC
-	/*
-	 * For an HVM guest or domain 0 which see "real" (emulated or
-	 * actual respectively) GSIs we allocate dynamic IRQs
-	 * e.g. those corresponding to event channels or MSIs
-	 * etc. from the range above those "real" GSIs to avoid
-	 * collisions.
-	 */
-	if (xen_initial_domain() || xen_hvm_domain())
-		first = get_nr_irqs_gsi();
-#endif
+	if (irq >= 0) {
+		for (i = 0; i < nvec; i++)
+			xen_irq_init(irq + i);
+	}
 
-	irq = irq_alloc_desc_from(first, -1);
+	return irq;
+}
 
-	if (irq >= 0)
-		xen_irq_init(irq);
+static inline int __must_check xen_allocate_irq_dynamic(void)
+{
 
-	return irq;
+	return xen_allocate_irqs_dynamic(1);
 }
 
 static int __must_check xen_allocate_irq_gsi(unsigned gsi)
@@ -538,6 +452,15 @@ static void xen_free_irq(unsigned irq)
 	irq_free_desc(irq);
 }
 
+static void xen_evtchn_close(unsigned int port)
+{
+	struct evtchn_close close;
+
+	close.port = port;
+	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+		BUG();
+}
+
 static void pirq_query_unmask(int irq)
 {
 	struct physdev_irq_status_query irq_status;
@@ -554,13 +477,6 @@ static void pirq_query_unmask(int irq)
 		info->u.pirq.flags |= PIRQ_NEEDS_EOI;
 }
 
-static bool probing_irq(int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	return desc && desc->action == NULL;
-}
-
 static void eoi_pirq(struct irq_data *data)
 {
 	int evtchn = evtchn_from_irq(data->irq);
@@ -602,15 +518,20 @@ static unsigned int __startup_pirq(unsigned int irq)
 					BIND_PIRQ__WILL_SHARE : 0;
 	rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
 	if (rc != 0) {
-		if (!probing_irq(irq))
-			pr_info("Failed to obtain physical IRQ %d\n", irq);
+		pr_warn("Failed to obtain physical IRQ %d\n", irq);
 		return 0;
 	}
 	evtchn = bind_pirq.port;
 
 	pirq_query_unmask(irq);
 
-	evtchn_to_irq[evtchn] = irq;
+	rc = set_evtchn_to_irq(evtchn, irq);
+	if (rc != 0) {
+		pr_err("irq%d: Failed to set port to irq mapping (%d)\n",
+		       irq, rc);
+		xen_evtchn_close(evtchn);
+		return 0;
+	}
 	bind_evtchn_to_cpu(evtchn, 0);
 	info->evtchn = evtchn;
 
@@ -628,10 +549,9 @@ static unsigned int startup_pirq(struct irq_data *data)
 
 static void shutdown_pirq(struct irq_data *data)
 {
-	struct evtchn_close close;
 	unsigned int irq = data->irq;
 	struct irq_info *info = info_for_irq(irq);
-	int evtchn = evtchn_from_irq(irq);
+	unsigned evtchn = evtchn_from_irq(irq);
 
 	BUG_ON(info->type != IRQT_PIRQ);
 
@@ -639,14 +559,8 @@ static void shutdown_pirq(struct irq_data *data)
 		return;
 
 	mask_evtchn(evtchn);
-
-	close.port = evtchn;
-	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
-		BUG();
-
-	bind_evtchn_to_cpu(evtchn, 0);
-	evtchn_to_irq[evtchn] = -1;
-	info->evtchn = 0;
+	xen_evtchn_close(evtchn);
+	xen_irq_info_cleanup(info);
 }
 
 static void enable_pirq(struct irq_data *data)
@@ -675,6 +589,41 @@ int xen_irq_from_gsi(unsigned gsi)
 }
 EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
 
+static void __unbind_from_irq(unsigned int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+	struct irq_info *info = irq_get_handler_data(irq);
+
+	if (info->refcnt > 0) {
+		info->refcnt--;
+		if (info->refcnt != 0)
+			return;
+	}
+
+	if (VALID_EVTCHN(evtchn)) {
+		unsigned int cpu = cpu_from_irq(irq);
+
+		xen_evtchn_close(evtchn);
+
+		switch (type_from_irq(irq)) {
+		case IRQT_VIRQ:
+			per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
+			break;
+		case IRQT_IPI:
+			per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
+			break;
+		default:
+			break;
+		}
+
+		xen_irq_info_cleanup(info);
+	}
+
+	BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
+
+	xen_free_irq(irq);
+}
+
 /*
  * Do not make any assumptions regarding the relationship between the
  * IRQ number returned here and the Xen pirq argument.
@@ -690,6 +639,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 {
 	int irq = -1;
 	struct physdev_irq irq_op;
+	int ret;
 
 	mutex_lock(&irq_mapping_update_lock);
 
@@ -717,8 +667,13 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 		goto out;
 	}
 
-	xen_irq_info_pirq_init(irq, 0, pirq, gsi, DOMID_SELF,
+	ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
 			       shareable ? PIRQ_SHAREABLE : 0);
+	if (ret < 0) {
+		__unbind_from_irq(irq);
+		irq = ret;
+		goto out;
+	}
 
 	pirq_query_unmask(irq);
 	/* We try to use the handler with the appropriate semantic for the
@@ -765,20 +720,25 @@ int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
 }
 
 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-			     int pirq, const char *name, domid_t domid)
+			     int pirq, int nvec, const char *name, domid_t domid)
 {
-	int irq, ret;
+	int i, irq, ret;
 
 	mutex_lock(&irq_mapping_update_lock);
 
-	irq = xen_allocate_irq_dynamic();
+	irq = xen_allocate_irqs_dynamic(nvec);
 	if (irq < 0)
 		goto out;
 
-	irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
-			name);
+	for (i = 0; i < nvec; i++) {
+		irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
+
+		ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
+					      i == 0 ? 0 : PIRQ_MSI_GROUP);
+		if (ret < 0)
+			goto error_irq;
+	}
 
-	xen_irq_info_pirq_init(irq, 0, pirq, 0, domid, 0);
 	ret = irq_set_msi_desc(irq, msidesc);
 	if (ret < 0)
 		goto error_irq;
@@ -786,26 +746,27 @@ out:
 	mutex_unlock(&irq_mapping_update_lock);
 	return irq;
 error_irq:
+	for (; i >= 0; i--)
+		__unbind_from_irq(irq + i);
 	mutex_unlock(&irq_mapping_update_lock);
-	xen_free_irq(irq);
 	return ret;
 }
 #endif
 
 int xen_destroy_irq(int irq)
 {
-	struct irq_desc *desc;
 	struct physdev_unmap_pirq unmap_irq;
 	struct irq_info *info = info_for_irq(irq);
 	int rc = -ENOENT;
 
 	mutex_lock(&irq_mapping_update_lock);
 
-	desc = irq_to_desc(irq);
-	if (!desc)
-		goto out;
-
-	if (xen_initial_domain()) {
+	/*
+	 * If trying to remove a vector in a MSI group different
+	 * than the first one skip the PIRQ unmap unless this vector
+	 * is the first one in the group.
+	 */
+	if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
 		unmap_irq.pirq = info->u.pirq.pirq;
 		unmap_irq.domid = info->u.pirq.domid;
 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
@@ -857,13 +818,18 @@ int xen_pirq_from_irq(unsigned irq)
 	return pirq_from_irq(irq);
 }
 EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
+
 int bind_evtchn_to_irq(unsigned int evtchn)
 {
 	int irq;
+	int ret;
+
+	if (evtchn >= xen_evtchn_max_channels())
+		return -ENOMEM;
 
 	mutex_lock(&irq_mapping_update_lock);
 
-	irq = evtchn_to_irq[evtchn];
+	irq = get_evtchn_to_irq(evtchn);
 
 	if (irq == -1) {
 		irq = xen_allocate_irq_dynamic();
@@ -873,7 +839,14 @@ int bind_evtchn_to_irq(unsigned int evtchn)
 		irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
 					      handle_edge_irq, "event");
 
-		xen_irq_info_evtchn_init(irq, evtchn);
+		ret = xen_irq_info_evtchn_setup(irq, evtchn);
+		if (ret < 0) {
+			__unbind_from_irq(irq);
+			irq = ret;
+			goto out;
+		}
+		/* New interdomain events are bound to VCPU 0. */
+		bind_evtchn_to_cpu(evtchn, 0);
 	} else {
 		struct irq_info *info = info_for_irq(irq);
 		WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@@ -890,6 +863,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 {
 	struct evtchn_bind_ipi bind_ipi;
 	int evtchn, irq;
+	int ret;
 
 	mutex_lock(&irq_mapping_update_lock);
 
@@ -909,8 +883,12 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 			BUG();
 		evtchn = bind_ipi.port;
 
-		xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
-
+		ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
+		if (ret < 0) {
+			__unbind_from_irq(irq);
+			irq = ret;
+			goto out;
+		}
 		bind_evtchn_to_cpu(evtchn, cpu);
 	} else {
 		struct irq_info *info = info_for_irq(irq);
@@ -943,7 +921,7 @@ static int find_virq(unsigned int virq, unsigned int cpu)
 	int port, rc = -ENOENT;
 
 	memset(&status, 0, sizeof(status));
-	for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
+	for (port = 0; port < xen_evtchn_max_channels(); port++) {
 		status.dom = DOMID_SELF;
 		status.port = port;
 		rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
@@ -959,6 +937,19 @@ static int find_virq(unsigned int virq, unsigned int cpu)
 	return rc;
 }
 
+/**
+ * xen_evtchn_nr_channels - number of usable event channel ports
+ *
+ * This may be less than the maximum supported by the current
+ * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
+ * supported.
+ */
+unsigned xen_evtchn_nr_channels(void)
+{
+        return evtchn_ops->nr_channels();
+}
+EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
+
 int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 {
 	struct evtchn_bind_virq bind_virq;
@@ -989,7 +980,12 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 			evtchn = ret;
 		}
 
-		xen_irq_info_virq_init(cpu, irq, evtchn, virq);
+		ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
+		if (ret < 0) {
+			__unbind_from_irq(irq);
+			irq = ret;
+			goto out;
+		}
 
 		bind_evtchn_to_cpu(evtchn, cpu);
 	} else {
@@ -1005,50 +1001,8 @@ out:
 
 static void unbind_from_irq(unsigned int irq)
 {
-	struct evtchn_close close;
-	int evtchn = evtchn_from_irq(irq);
-	struct irq_info *info = irq_get_handler_data(irq);
-
-	if (WARN_ON(!info))
-		return;
-
 	mutex_lock(&irq_mapping_update_lock);
-
-	if (info->refcnt > 0) {
-		info->refcnt--;
-		if (info->refcnt != 0)
-			goto done;
-	}
-
-	if (VALID_EVTCHN(evtchn)) {
-		close.port = evtchn;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
-			BUG();
-
-		switch (type_from_irq(irq)) {
-		case IRQT_VIRQ:
-			per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
-				[virq_from_irq(irq)] = -1;
-			break;
-		case IRQT_IPI:
-			per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
-				[ipi_from_irq(irq)] = -1;
-			break;
-		default:
-			break;
-		}
-
-		/* Closed ports are implicitly re-bound to VCPU0. */
-		bind_evtchn_to_cpu(evtchn, 0);
-
-		evtchn_to_irq[evtchn] = -1;
-	}
-
-	BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
-
-	xen_free_irq(irq);
-
- done:
+	__unbind_from_irq(irq);
 	mutex_unlock(&irq_mapping_update_lock);
 }
 
@@ -1148,9 +1102,26 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id)
 }
 EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
 
+/**
+ * xen_set_irq_priority() - set an event channel priority.
+ * @irq:irq bound to an event channel.
+ * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
+ */
+int xen_set_irq_priority(unsigned irq, unsigned priority)
+{
+	struct evtchn_set_priority set_priority;
+
+	set_priority.port = evtchn_from_irq(irq);
+	set_priority.priority = priority;
+
+	return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
+					   &set_priority);
+}
+EXPORT_SYMBOL_GPL(xen_set_irq_priority);
+
 int evtchn_make_refcounted(unsigned int evtchn)
 {
-	int irq = evtchn_to_irq[evtchn];
+	int irq = get_evtchn_to_irq(evtchn);
 	struct irq_info *info;
 
 	if (irq == -1)
@@ -1175,12 +1146,12 @@ int evtchn_get(unsigned int evtchn)
 	struct irq_info *info;
 	int err = -ENOENT;
 
-	if (evtchn >= NR_EVENT_CHANNELS)
+	if (evtchn >= xen_evtchn_max_channels())
 		return -EINVAL;
 
 	mutex_lock(&irq_mapping_update_lock);
 
-	irq = evtchn_to_irq[evtchn];
+	irq = get_evtchn_to_irq(evtchn);
 	if (irq == -1)
 		goto done;
 
@@ -1204,7 +1175,7 @@ EXPORT_SYMBOL_GPL(evtchn_get);
 
 void evtchn_put(unsigned int evtchn)
 {
-	int irq = evtchn_to_irq[evtchn];
+	int irq = get_evtchn_to_irq(evtchn);
 	if (WARN_ON(irq == -1))
 		return;
 	unbind_from_irq(irq);
@@ -1228,222 +1199,21 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
 	notify_remote_via_irq(irq);
 }
 
-irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
-{
-	struct shared_info *sh = HYPERVISOR_shared_info;
-	int cpu = smp_processor_id();
-	xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
-	int i;
-	unsigned long flags;
-	static DEFINE_SPINLOCK(debug_lock);
-	struct vcpu_info *v;
-
-	spin_lock_irqsave(&debug_lock, flags);
-
-	printk("\nvcpu %d\n  ", cpu);
-
-	for_each_online_cpu(i) {
-		int pending;
-		v = per_cpu(xen_vcpu, i);
-		pending = (get_irq_regs() && i == cpu)
-			? xen_irqs_disabled(get_irq_regs())
-			: v->evtchn_upcall_mask;
-		printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n  ", i,
-		       pending, v->evtchn_upcall_pending,
-		       (int)(sizeof(v->evtchn_pending_sel)*2),
-		       v->evtchn_pending_sel);
-	}
-	v = per_cpu(xen_vcpu, cpu);
-
-	printk("\npending:\n   ");
-	for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
-		printk("%0*"PRI_xen_ulong"%s",
-		       (int)sizeof(sh->evtchn_pending[0])*2,
-		       sh->evtchn_pending[i],
-		       i % 8 == 0 ? "\n   " : " ");
-	printk("\nglobal mask:\n   ");
-	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
-		printk("%0*"PRI_xen_ulong"%s",
-		       (int)(sizeof(sh->evtchn_mask[0])*2),
-		       sh->evtchn_mask[i],
-		       i % 8 == 0 ? "\n   " : " ");
-
-	printk("\nglobally unmasked:\n   ");
-	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
-		printk("%0*"PRI_xen_ulong"%s",
-		       (int)(sizeof(sh->evtchn_mask[0])*2),
-		       sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
-		       i % 8 == 0 ? "\n   " : " ");
-
-	printk("\nlocal cpu%d mask:\n   ", cpu);
-	for (i = (NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
-		printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
-		       cpu_evtchn[i],
-		       i % 8 == 0 ? "\n   " : " ");
-
-	printk("\nlocally unmasked:\n   ");
-	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
-		xen_ulong_t pending = sh->evtchn_pending[i]
-			& ~sh->evtchn_mask[i]
-			& cpu_evtchn[i];
-		printk("%0*"PRI_xen_ulong"%s",
-		       (int)(sizeof(sh->evtchn_mask[0])*2),
-		       pending, i % 8 == 0 ? "\n   " : " ");
-	}
-
-	printk("\npending list:\n");
-	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-		if (sync_test_bit(i, BM(sh->evtchn_pending))) {
-			int word_idx = i / BITS_PER_EVTCHN_WORD;
-			printk("  %d: event %d -> irq %d%s%s%s\n",
-			       cpu_from_evtchn(i), i,
-			       evtchn_to_irq[i],
-			       sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
-					     ? "" : " l2-clear",
-			       !sync_test_bit(i, BM(sh->evtchn_mask))
-					     ? "" : " globally-masked",
-			       sync_test_bit(i, BM(cpu_evtchn))
-					     ? "" : " locally-masked");
-		}
-	}
-
-	spin_unlock_irqrestore(&debug_lock, flags);
-
-	return IRQ_HANDLED;
-}
-
 static DEFINE_PER_CPU(unsigned, xed_nesting_count);
-static DEFINE_PER_CPU(unsigned int, current_word_idx);
-static DEFINE_PER_CPU(unsigned int, current_bit_idx);
 
-/*
- * Mask out the i least significant bits of w
- */
-#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
-
-/*
- * Search the CPUs pending events bitmasks.  For each one found, map
- * the event number to an irq, and feed it into do_IRQ() for
- * handling.
- *
- * Xen uses a two-level bitmap to speed searching.  The first level is
- * a bitset of words which contain pending event bits.  The second
- * level is a bitset of pending events themselves.
- */
 static void __xen_evtchn_do_upcall(void)
 {
-	int start_word_idx, start_bit_idx;
-	int word_idx, bit_idx;
-	int i, irq;
-	int cpu = get_cpu();
-	struct shared_info *s = HYPERVISOR_shared_info;
 	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+	int cpu = get_cpu();
 	unsigned count;
 
 	do {
-		xen_ulong_t pending_words;
-		xen_ulong_t pending_bits;
-		struct irq_desc *desc;
-
 		vcpu_info->evtchn_upcall_pending = 0;
 
 		if (__this_cpu_inc_return(xed_nesting_count) - 1)
 			goto out;
 
-		/*
-		 * Master flag must be cleared /before/ clearing
-		 * selector flag. xchg_xen_ulong must contain an
-		 * appropriate barrier.
-		 */
-		if ((irq = per_cpu(virq_to_irq, cpu)[VIRQ_TIMER]) != -1) {
-			int evtchn = evtchn_from_irq(irq);
-			word_idx = evtchn / BITS_PER_LONG;
-			pending_bits = evtchn % BITS_PER_LONG;
-			if (active_evtchns(cpu, s, word_idx) & (1ULL << pending_bits)) {
-				desc = irq_to_desc(irq);
-				if (desc)
-					generic_handle_irq_desc(irq, desc);
-			}
-		}
-
-		pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
-
-		start_word_idx = __this_cpu_read(current_word_idx);
-		start_bit_idx = __this_cpu_read(current_bit_idx);
-
-		word_idx = start_word_idx;
-
-		for (i = 0; pending_words != 0; i++) {
-			xen_ulong_t words;
-
-			words = MASK_LSBS(pending_words, word_idx);
-
-			/*
-			 * If we masked out all events, wrap to beginning.
-			 */
-			if (words == 0) {
-				word_idx = 0;
-				bit_idx = 0;
-				continue;
-			}
-			word_idx = EVTCHN_FIRST_BIT(words);
-
-			pending_bits = active_evtchns(cpu, s, word_idx);
-			bit_idx = 0; /* usually scan entire word from start */
-			/*
-			 * We scan the starting word in two parts.
-			 *
-			 * 1st time: start in the middle, scanning the
-			 * upper bits.
-			 *
-			 * 2nd time: scan the whole word (not just the
-			 * parts skipped in the first pass) -- if an
-			 * event in the previously scanned bits is
-			 * pending again it would just be scanned on
-			 * the next loop anyway.
-			 */
-			if (word_idx == start_word_idx) {
-				if (i == 0)
-					bit_idx = start_bit_idx;
-			}
-
-			do {
-				xen_ulong_t bits;
-				int port;
-
-				bits = MASK_LSBS(pending_bits, bit_idx);
-
-				/* If we masked out all events, move on. */
-				if (bits == 0)
-					break;
-
-				bit_idx = EVTCHN_FIRST_BIT(bits);
-
-				/* Process port. */
-				port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
-				irq = evtchn_to_irq[port];
-
-				if (irq != -1) {
-					desc = irq_to_desc(irq);
-					if (desc)
-						generic_handle_irq_desc(irq, desc);
-				}
-
-				bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
-
-				/* Next caller starts at last processed + 1 */
-				__this_cpu_write(current_word_idx,
-						 bit_idx ? word_idx :
-						 (word_idx+1) % BITS_PER_EVTCHN_WORD);
-				__this_cpu_write(current_bit_idx, bit_idx);
-			} while (bit_idx != 0);
-
-			/* Scan start_l1i twice; all others once. */
-			if ((word_idx != start_word_idx) || (i != 0))
-				pending_words &= ~(1UL << word_idx);
-
-			word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
-		}
+		xen_evtchn_handle_events(cpu);
 
 		BUG_ON(!irqs_disabled());
 
@@ -1463,6 +1233,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 	irq_enter();
 #ifdef CONFIG_X86
 	exit_idle();
+	inc_irq_stat(irq_hv_callback_count);
 #endif
 
 	__xen_evtchn_do_upcall();
@@ -1492,12 +1263,12 @@ void rebind_evtchn_irq(int evtchn, int irq)
 	mutex_lock(&irq_mapping_update_lock);
 
 	/* After resume the irq<->evtchn mappings are all cleared out */
-	BUG_ON(evtchn_to_irq[evtchn] != -1);
+	BUG_ON(get_evtchn_to_irq(evtchn) != -1);
 	/* Expect irq to have been bound before,
 	   so there should be a proper type */
 	BUG_ON(info->type == IRQT_UNBOUND);
 
-	xen_irq_info_evtchn_init(irq, evtchn);
+	(void)xen_irq_info_evtchn_setup(irq, evtchn);
 
 	mutex_unlock(&irq_mapping_update_lock);
 
@@ -1511,7 +1282,6 @@ void rebind_evtchn_irq(int evtchn, int irq)
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
 static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 {
-	struct shared_info *s = HYPERVISOR_shared_info;
 	struct evtchn_bind_vcpu bind_vcpu;
 	int evtchn = evtchn_from_irq(irq);
 	int masked;
@@ -1534,7 +1304,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 	 * Mask the event while changing the VCPU binding to prevent
 	 * it being delivered on an unexpected VCPU.
 	 */
-	masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask));
+	masked = test_and_set_mask(evtchn);
 
 	/*
 	 * If this fails, it usually just indicates that we're dealing with a
@@ -1553,27 +1323,11 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
 			    bool force)
 {
-	unsigned tcpu = cpumask_first(dest);
+	unsigned tcpu = cpumask_first_and(dest, cpu_online_mask);
 
 	return rebind_irq_to_cpu(data->irq, tcpu);
 }
 
-int resend_irq_on_evtchn(unsigned int irq)
-{
-	int masked, evtchn = evtchn_from_irq(irq);
-	struct shared_info *s = HYPERVISOR_shared_info;
-
-	if (!VALID_EVTCHN(evtchn))
-		return 1;
-
-	masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask));
-	sync_set_bit(evtchn, BM(s->evtchn_pending));
-	if (!masked)
-		unmask_evtchn(evtchn);
-
-	return 1;
-}
-
 static void enable_dynirq(struct irq_data *data)
 {
 	int evtchn = evtchn_from_irq(data->irq);
@@ -1608,21 +1362,18 @@ static void mask_ack_dynirq(struct irq_data *data)
 
 static int retrigger_dynirq(struct irq_data *data)
 {
-	int evtchn = evtchn_from_irq(data->irq);
-	struct shared_info *sh = HYPERVISOR_shared_info;
-	int ret = 0;
+	unsigned int evtchn = evtchn_from_irq(data->irq);
+	int masked;
 
-	if (VALID_EVTCHN(evtchn)) {
-		int masked;
+	if (!VALID_EVTCHN(evtchn))
+		return 0;
 
-		masked = sync_test_and_set_bit(evtchn, BM(sh->evtchn_mask));
-		sync_set_bit(evtchn, BM(sh->evtchn_pending));
-		if (!masked)
-			unmask_evtchn(evtchn);
-		ret = 1;
-	}
+	masked = test_and_set_mask(evtchn);
+	set_evtchn(evtchn);
+	if (!masked)
+		unmask_evtchn(evtchn);
 
-	return ret;
+	return 1;
 }
 
 static void restore_pirqs(void)
@@ -1683,7 +1434,7 @@ static void restore_cpu_virqs(unsigned int cpu)
 		evtchn = bind_virq.port;
 
 		/* Record the new mapping. */
-		xen_irq_info_virq_init(cpu, irq, evtchn, virq);
+		(void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
 		bind_evtchn_to_cpu(evtchn, cpu);
 	}
 }
@@ -1707,7 +1458,7 @@ static void restore_cpu_ipis(unsigned int cpu)
 		evtchn = bind_ipi.port;
 
 		/* Record the new mapping. */
-		xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
+		(void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
 		bind_evtchn_to_cpu(evtchn, cpu);
 	}
 }
@@ -1784,21 +1535,18 @@ EXPORT_SYMBOL_GPL(xen_test_irq_shared);
 
 void xen_irq_resume(void)
 {
-	unsigned int cpu, evtchn;
+	unsigned int cpu;
 	struct irq_info *info;
 
-	init_evtchn_cpu_bindings();
-
 	/* New event-channel space is not 'live' yet. */
-	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
-		mask_evtchn(evtchn);
+	xen_evtchn_mask_all();
+	xen_evtchn_resume();
 
 	/* No IRQ <-> event-channel mappings. */
 	list_for_each_entry(info, &xen_irq_list_head, list)
 		info->evtchn = 0; /* zap event-channel binding */
 
-	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
-		evtchn_to_irq[evtchn] = -1;
+	clear_evtchn_to_irq_all();
 
 	for_each_possible_cpu(cpu) {
 		restore_cpu_virqs(cpu);
@@ -1889,27 +1637,40 @@ void xen_callback_vector(void)
 void xen_callback_vector(void) {}
 #endif
 
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static bool fifo_events = true;
+module_param(fifo_events, bool, 0);
+
 void __init xen_init_IRQ(void)
 {
-	int i;
+	int ret = -EINVAL;
 
-	evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
-				    GFP_KERNEL);
-	BUG_ON(!evtchn_to_irq);
-	for (i = 0; i < NR_EVENT_CHANNELS; i++)
-		evtchn_to_irq[i] = -1;
+	if (fifo_events)
+		ret = xen_evtchn_fifo_init();
+	if (ret < 0)
+		xen_evtchn_2l_init();
 
-	init_evtchn_cpu_bindings();
+	evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
+				sizeof(*evtchn_to_irq), GFP_KERNEL);
+	BUG_ON(!evtchn_to_irq);
 
 	/* No event channels are 'live' right now. */
-	for (i = 0; i < NR_EVENT_CHANNELS; i++)
-		mask_evtchn(i);
+	xen_evtchn_mask_all();
 
 	pirq_needs_eoi = pirq_needs_eoi_flag;
 
 #ifdef CONFIG_X86
-	if (xen_hvm_domain()) {
+	if (xen_pv_domain()) {
+		irq_ctx_init(smp_processor_id());
+		if (xen_initial_domain())
+			pci_xen_initial_domain();
+	}
+	if (xen_feature(XENFEAT_hvm_callback_vector))
 		xen_callback_vector();
+
+	if (xen_hvm_domain()) {
 		native_init_IRQ();
 		/* pci_xen_hvm_init must be called after native_init_IRQ so that
 		 * __acpi_register_gsi can point at the right function */
@@ -1918,13 +1679,10 @@ void __init xen_init_IRQ(void)
 		int rc;
 		struct physdev_pirq_eoi_gmfn eoi_gmfn;
 
-		irq_ctx_init(smp_processor_id());
-		if (xen_initial_domain())
-			pci_xen_initial_domain();
-
 		pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 		eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
+		/* TODO: No PVH support for PIRQ EOI */
 		if (rc != 0) {
 			free_page((unsigned long) pirq_eoi_map);
 			pirq_eoi_map = NULL;
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
new file mode 100644
index 00000000000..84b4bfb8434
--- /dev/null
+++ b/drivers/xen/events/events_fifo.c
@@ -0,0 +1,443 @@
+/*
+ * Xen event channels (FIFO-based ABI)
+ *
+ * Copyright (C) 2013 Citrix Systems R&D ltd.
+ *
+ * This source code is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * Or, when distributed separately from the Linux kernel or
+ * incorporated into other software packages, subject to the following
+ * license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
+
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+#include "events_internal.h"
+
+#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
+#define MAX_EVENT_ARRAY_PAGES (EVTCHN_FIFO_NR_CHANNELS / EVENT_WORDS_PER_PAGE)
+
+struct evtchn_fifo_queue {
+	uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
+};
+
+static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block);
+static DEFINE_PER_CPU(struct evtchn_fifo_queue, cpu_queue);
+static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES] __read_mostly;
+static unsigned event_array_pages __read_mostly;
+
+/*
+ * sync_set_bit() and friends must be unsigned long aligned on non-x86
+ * platforms.
+ */
+#if !defined(CONFIG_X86) && BITS_PER_LONG > 32
+
+#define BM(w) (unsigned long *)((unsigned long)w & ~0x7UL)
+#define EVTCHN_FIFO_BIT(b, w) \
+    (((unsigned long)w & 0x4UL) ? (EVTCHN_FIFO_ ##b + 32) : EVTCHN_FIFO_ ##b)
+
+#else
+
+#define BM(w) ((unsigned long *)(w))
+#define EVTCHN_FIFO_BIT(b, w) EVTCHN_FIFO_ ##b
+
+#endif
+
+static inline event_word_t *event_word_from_port(unsigned port)
+{
+	unsigned i = port / EVENT_WORDS_PER_PAGE;
+
+	return event_array[i] + port % EVENT_WORDS_PER_PAGE;
+}
+
+static unsigned evtchn_fifo_max_channels(void)
+{
+	return EVTCHN_FIFO_NR_CHANNELS;
+}
+
+static unsigned evtchn_fifo_nr_channels(void)
+{
+	return event_array_pages * EVENT_WORDS_PER_PAGE;
+}
+
+static void free_unused_array_pages(void)
+{
+	unsigned i;
+
+	for (i = event_array_pages; i < MAX_EVENT_ARRAY_PAGES; i++) {
+		if (!event_array[i])
+			break;
+		free_page((unsigned long)event_array[i]);
+		event_array[i] = NULL;
+	}
+}
+
+static void init_array_page(event_word_t *array_page)
+{
+	unsigned i;
+
+	for (i = 0; i < EVENT_WORDS_PER_PAGE; i++)
+		array_page[i] = 1 << EVTCHN_FIFO_MASKED;
+}
+
+static int evtchn_fifo_setup(struct irq_info *info)
+{
+	unsigned port = info->evtchn;
+	unsigned new_array_pages;
+	int ret;
+
+	new_array_pages = port / EVENT_WORDS_PER_PAGE + 1;
+
+	if (new_array_pages > MAX_EVENT_ARRAY_PAGES)
+		return -EINVAL;
+
+	while (event_array_pages < new_array_pages) {
+		void *array_page;
+		struct evtchn_expand_array expand_array;
+
+		/* Might already have a page if we've resumed. */
+		array_page = event_array[event_array_pages];
+		if (!array_page) {
+			array_page = (void *)__get_free_page(GFP_KERNEL);
+			if (array_page == NULL) {
+				ret = -ENOMEM;
+				goto error;
+			}
+			event_array[event_array_pages] = array_page;
+		}
+
+		/* Mask all events in this page before adding it. */
+		init_array_page(array_page);
+
+		expand_array.array_gfn = virt_to_mfn(array_page);
+
+		ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, &expand_array);
+		if (ret < 0)
+			goto error;
+
+		event_array_pages++;
+	}
+	return 0;
+
+  error:
+	if (event_array_pages == 0)
+		panic("xen: unable to expand event array with initial page (%d)\n", ret);
+	else
+		pr_err("unable to expand event array (%d)\n", ret);
+	free_unused_array_pages();
+	return ret;
+}
+
+static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu)
+{
+	/* no-op */
+}
+
+static void evtchn_fifo_clear_pending(unsigned port)
+{
+	event_word_t *word = event_word_from_port(port);
+	sync_clear_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
+}
+
+static void evtchn_fifo_set_pending(unsigned port)
+{
+	event_word_t *word = event_word_from_port(port);
+	sync_set_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
+}
+
+static bool evtchn_fifo_is_pending(unsigned port)
+{
+	event_word_t *word = event_word_from_port(port);
+	return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
+}
+
+static bool evtchn_fifo_test_and_set_mask(unsigned port)
+{
+	event_word_t *word = event_word_from_port(port);
+	return sync_test_and_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
+}
+
+static void evtchn_fifo_mask(unsigned port)
+{
+	event_word_t *word = event_word_from_port(port);
+	sync_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
+}
+
+static bool evtchn_fifo_is_masked(unsigned port)
+{
+	event_word_t *word = event_word_from_port(port);
+	return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
+}
+/*
+ * Clear MASKED, spinning if BUSY is set.
+ */
+static void clear_masked(volatile event_word_t *word)
+{
+	event_word_t new, old, w;
+
+	w = *word;
+
+	do {
+		old = w & ~(1 << EVTCHN_FIFO_BUSY);
+		new = old & ~(1 << EVTCHN_FIFO_MASKED);
+		w = sync_cmpxchg(word, old, new);
+	} while (w != old);
+}
+
+static void evtchn_fifo_unmask(unsigned port)
+{
+	event_word_t *word = event_word_from_port(port);
+
+	BUG_ON(!irqs_disabled());
+
+	clear_masked(word);
+	if (evtchn_fifo_is_pending(port)) {
+		struct evtchn_unmask unmask = { .port = port };
+		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+	}
+}
+
+static uint32_t clear_linked(volatile event_word_t *word)
+{
+	event_word_t new, old, w;
+
+	w = *word;
+
+	do {
+		old = w;
+		new = (w & ~((1 << EVTCHN_FIFO_LINKED)
+			     | EVTCHN_FIFO_LINK_MASK));
+	} while ((w = sync_cmpxchg(word, old, new)) != old);
+
+	return w & EVTCHN_FIFO_LINK_MASK;
+}
+
+static void handle_irq_for_port(unsigned port)
+{
+	int irq;
+
+	irq = get_evtchn_to_irq(port);
+	if (irq != -1)
+		generic_handle_irq(irq);
+}
+
+static void consume_one_event(unsigned cpu,
+			      struct evtchn_fifo_control_block *control_block,
+			      unsigned priority, unsigned long *ready)
+{
+	struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
+	uint32_t head;
+	unsigned port;
+	event_word_t *word;
+
+	head = q->head[priority];
+
+	/*
+	 * Reached the tail last time?  Read the new HEAD from the
+	 * control block.
+	 */
+	if (head == 0) {
+		rmb(); /* Ensure word is up-to-date before reading head. */
+		head = control_block->head[priority];
+	}
+
+	port = head;
+	word = event_word_from_port(port);
+	head = clear_linked(word);
+
+	/*
+	 * If the link is non-zero, there are more events in the
+	 * queue, otherwise the queue is empty.
+	 *
+	 * If the queue is empty, clear this priority from our local
+	 * copy of the ready word.
+	 */
+	if (head == 0)
+		clear_bit(priority, ready);
+
+	if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port))
+		handle_irq_for_port(port);
+
+	q->head[priority] = head;
+}
+
+static void evtchn_fifo_handle_events(unsigned cpu)
+{
+	struct evtchn_fifo_control_block *control_block;
+	unsigned long ready;
+	unsigned q;
+
+	control_block = per_cpu(cpu_control_block, cpu);
+
+	ready = xchg(&control_block->ready, 0);
+
+	while (ready) {
+		q = find_first_bit(BM(&ready), EVTCHN_FIFO_MAX_QUEUES);
+		consume_one_event(cpu, control_block, q, &ready);
+		ready |= xchg(&control_block->ready, 0);
+	}
+}
+
+static void evtchn_fifo_resume(void)
+{
+	unsigned cpu;
+
+	for_each_possible_cpu(cpu) {
+		void *control_block = per_cpu(cpu_control_block, cpu);
+		struct evtchn_init_control init_control;
+		int ret;
+
+		if (!control_block)
+			continue;
+
+		/*
+		 * If this CPU is offline, take the opportunity to
+		 * free the control block while it is not being
+		 * used.
+		 */
+		if (!cpu_online(cpu)) {
+			free_page((unsigned long)control_block);
+			per_cpu(cpu_control_block, cpu) = NULL;
+			continue;
+		}
+
+		init_control.control_gfn = virt_to_mfn(control_block);
+		init_control.offset = 0;
+		init_control.vcpu = cpu;
+
+		ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control,
+						  &init_control);
+		if (ret < 0)
+			BUG();
+	}
+
+	/*
+	 * The event array starts out as empty again and is extended
+	 * as normal when events are bound.  The existing pages will
+	 * be reused.
+	 */
+	event_array_pages = 0;
+}
+
+static const struct evtchn_ops evtchn_ops_fifo = {
+	.max_channels      = evtchn_fifo_max_channels,
+	.nr_channels       = evtchn_fifo_nr_channels,
+	.setup             = evtchn_fifo_setup,
+	.bind_to_cpu       = evtchn_fifo_bind_to_cpu,
+	.clear_pending     = evtchn_fifo_clear_pending,
+	.set_pending       = evtchn_fifo_set_pending,
+	.is_pending        = evtchn_fifo_is_pending,
+	.test_and_set_mask = evtchn_fifo_test_and_set_mask,
+	.mask              = evtchn_fifo_mask,
+	.unmask            = evtchn_fifo_unmask,
+	.handle_events     = evtchn_fifo_handle_events,
+	.resume            = evtchn_fifo_resume,
+};
+
+static int evtchn_fifo_init_control_block(unsigned cpu)
+{
+	struct page *control_block = NULL;
+	struct evtchn_init_control init_control;
+	int ret = -ENOMEM;
+
+	control_block = alloc_page(GFP_KERNEL|__GFP_ZERO);
+	if (control_block == NULL)
+		goto error;
+
+	init_control.control_gfn = virt_to_mfn(page_address(control_block));
+	init_control.offset      = 0;
+	init_control.vcpu        = cpu;
+
+	ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
+	if (ret < 0)
+		goto error;
+
+	per_cpu(cpu_control_block, cpu) = page_address(control_block);
+
+	return 0;
+
+  error:
+	__free_page(control_block);
+	return ret;
+}
+
+static int evtchn_fifo_cpu_notification(struct notifier_block *self,
+						  unsigned long action,
+						  void *hcpu)
+{
+	int cpu = (long)hcpu;
+	int ret = 0;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		if (!per_cpu(cpu_control_block, cpu))
+			ret = evtchn_fifo_init_control_block(cpu);
+		break;
+	default:
+		break;
+	}
+	return ret < 0 ? NOTIFY_BAD : NOTIFY_OK;
+}
+
+static struct notifier_block evtchn_fifo_cpu_notifier = {
+	.notifier_call	= evtchn_fifo_cpu_notification,
+};
+
+int __init xen_evtchn_fifo_init(void)
+{
+	int cpu = get_cpu();
+	int ret;
+
+	ret = evtchn_fifo_init_control_block(cpu);
+	if (ret < 0)
+		goto out;
+
+	pr_info("Using FIFO-based ABI\n");
+
+	evtchn_ops = &evtchn_ops_fifo;
+
+	register_cpu_notifier(&evtchn_fifo_cpu_notifier);
+out:
+	put_cpu();
+	return ret;
+}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
new file mode 100644
index 00000000000..50c2050a1e3
--- /dev/null
+++ b/drivers/xen/events/events_internal.h
@@ -0,0 +1,151 @@
+/*
+ * Xen Event Channels (internal header)
+ *
+ * Copyright (C) 2013 Citrix Systems R&D Ltd.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2 or later.  See the file COPYING for more details.
+ */
+#ifndef __EVENTS_INTERNAL_H__
+#define __EVENTS_INTERNAL_H__
+
+/* Interrupt types. */
+enum xen_irq_type {
+	IRQT_UNBOUND = 0,
+	IRQT_PIRQ,
+	IRQT_VIRQ,
+	IRQT_IPI,
+	IRQT_EVTCHN
+};
+
+/*
+ * Packed IRQ information:
+ * type - enum xen_irq_type
+ * event channel - irq->event channel mapping
+ * cpu - cpu this event channel is bound to
+ * index - type-specific information:
+ *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
+ *           guest, or GSI (real passthrough IRQ) of the device.
+ *    VIRQ - virq number
+ *    IPI - IPI vector
+ *    EVTCHN -
+ */
+struct irq_info {
+	struct list_head list;
+	int refcnt;
+	enum xen_irq_type type;	/* type */
+	unsigned irq;
+	unsigned int evtchn;	/* event channel */
+	unsigned short cpu;	/* cpu bound */
+
+	union {
+		unsigned short virq;
+		enum ipi_vector ipi;
+		struct {
+			unsigned short pirq;
+			unsigned short gsi;
+			unsigned char vector;
+			unsigned char flags;
+			uint16_t domid;
+		} pirq;
+	} u;
+};
+
+#define PIRQ_NEEDS_EOI	(1 << 0)
+#define PIRQ_SHAREABLE	(1 << 1)
+#define PIRQ_MSI_GROUP	(1 << 2)
+
+struct evtchn_ops {
+	unsigned (*max_channels)(void);
+	unsigned (*nr_channels)(void);
+
+	int (*setup)(struct irq_info *info);
+	void (*bind_to_cpu)(struct irq_info *info, unsigned cpu);
+
+	void (*clear_pending)(unsigned port);
+	void (*set_pending)(unsigned port);
+	bool (*is_pending)(unsigned port);
+	bool (*test_and_set_mask)(unsigned port);
+	void (*mask)(unsigned port);
+	void (*unmask)(unsigned port);
+
+	void (*handle_events)(unsigned cpu);
+	void (*resume)(void);
+};
+
+extern const struct evtchn_ops *evtchn_ops;
+
+extern int **evtchn_to_irq;
+int get_evtchn_to_irq(unsigned int evtchn);
+
+struct irq_info *info_for_irq(unsigned irq);
+unsigned cpu_from_irq(unsigned irq);
+unsigned cpu_from_evtchn(unsigned int evtchn);
+
+static inline unsigned xen_evtchn_max_channels(void)
+{
+	return evtchn_ops->max_channels();
+}
+
+/*
+ * Do any ABI specific setup for a bound event channel before it can
+ * be unmasked and used.
+ */
+static inline int xen_evtchn_port_setup(struct irq_info *info)
+{
+	if (evtchn_ops->setup)
+		return evtchn_ops->setup(info);
+	return 0;
+}
+
+static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info,
+					       unsigned cpu)
+{
+	evtchn_ops->bind_to_cpu(info, cpu);
+}
+
+static inline void clear_evtchn(unsigned port)
+{
+	evtchn_ops->clear_pending(port);
+}
+
+static inline void set_evtchn(unsigned port)
+{
+	evtchn_ops->set_pending(port);
+}
+
+static inline bool test_evtchn(unsigned port)
+{
+	return evtchn_ops->is_pending(port);
+}
+
+static inline bool test_and_set_mask(unsigned port)
+{
+	return evtchn_ops->test_and_set_mask(port);
+}
+
+static inline void mask_evtchn(unsigned port)
+{
+	return evtchn_ops->mask(port);
+}
+
+static inline void unmask_evtchn(unsigned port)
+{
+	return evtchn_ops->unmask(port);
+}
+
+static inline void xen_evtchn_handle_events(unsigned cpu)
+{
+	return evtchn_ops->handle_events(cpu);
+}
+
+static inline void xen_evtchn_resume(void)
+{
+	if (evtchn_ops->resume)
+		evtchn_ops->resume();
+}
+
+void xen_evtchn_2l_init(void);
+int xen_evtchn_fifo_init(void);
+
+#endif /* #ifndef __EVENTS_INTERNAL_H__ */
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 8b3a69a06c3..00f40f051d9 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -305,7 +305,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
 	if (rc < 0)
 		goto err;
 
-	rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
+	rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
 				       u->name, evtchn);
 	if (rc < 0)
 		goto err;
@@ -417,7 +417,7 @@ static long evtchn_ioctl(struct file *file,
 			break;
 
 		rc = -EINVAL;
-		if (unbind.port >= NR_EVENT_CHANNELS)
+		if (unbind.port >= xen_evtchn_nr_channels())
 			break;
 
 		rc = -ENOTCONN;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e41c79c986e..073b4a19a8b 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -846,7 +846,7 @@ static int __init gntdev_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
-	use_ptemod = xen_pv_domain();
+	use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap);
 
 	err = misc_register(&gntdev_miscdev);
 	if (err != 0) {
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index c4d2298893b..eeba7544f0c 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -49,6 +49,7 @@
 #include <xen/grant_table.h>
 #include <xen/interface/memory.h>
 #include <xen/hvc-console.h>
+#include <xen/swiotlb-xen.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/interface.h>
 
@@ -61,12 +62,10 @@
 
 static grant_ref_t **gnttab_list;
 static unsigned int nr_grant_frames;
-static unsigned int boot_max_nr_grant_frames;
 static int gnttab_free_count;
 static grant_ref_t gnttab_free_head;
 static DEFINE_SPINLOCK(gnttab_list_lock);
-unsigned long xen_hvm_resume_frames;
-EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
+struct grant_frames xen_auto_xlat_grant_frames;
 
 static union {
 	struct grant_entry_v1 *v1;
@@ -826,6 +825,11 @@ static unsigned int __max_nr_grant_frames(void)
 unsigned int gnttab_max_grant_frames(void)
 {
 	unsigned int xen_max = __max_nr_grant_frames();
+	static unsigned int boot_max_nr_grant_frames;
+
+	/* First time, initialize it properly. */
+	if (!boot_max_nr_grant_frames)
+		boot_max_nr_grant_frames = __max_nr_grant_frames();
 
 	if (xen_max > boot_max_nr_grant_frames)
 		return boot_max_nr_grant_frames;
@@ -833,6 +837,51 @@ unsigned int gnttab_max_grant_frames(void)
 }
 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
 
+int gnttab_setup_auto_xlat_frames(phys_addr_t addr)
+{
+	xen_pfn_t *pfn;
+	unsigned int max_nr_gframes = __max_nr_grant_frames();
+	unsigned int i;
+	void *vaddr;
+
+	if (xen_auto_xlat_grant_frames.count)
+		return -EINVAL;
+
+	vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes);
+	if (vaddr == NULL) {
+		pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n",
+			&addr);
+		return -ENOMEM;
+	}
+	pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
+	if (!pfn) {
+		xen_unmap(vaddr);
+		return -ENOMEM;
+	}
+	for (i = 0; i < max_nr_gframes; i++)
+		pfn[i] = PFN_DOWN(addr) + i;
+
+	xen_auto_xlat_grant_frames.vaddr = vaddr;
+	xen_auto_xlat_grant_frames.pfn = pfn;
+	xen_auto_xlat_grant_frames.count = max_nr_gframes;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
+
+void gnttab_free_auto_xlat_frames(void)
+{
+	if (!xen_auto_xlat_grant_frames.count)
+		return;
+	kfree(xen_auto_xlat_grant_frames.pfn);
+	xen_unmap(xen_auto_xlat_grant_frames.vaddr);
+
+	xen_auto_xlat_grant_frames.pfn = NULL;
+	xen_auto_xlat_grant_frames.count = 0;
+	xen_auto_xlat_grant_frames.vaddr = NULL;
+}
+EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
+
 /* Handling of paged out grant targets (GNTST_eagain) */
 #define MAX_DELAY 256
 static inline void
@@ -884,9 +933,6 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 		    struct page **pages, unsigned int count)
 {
 	int i, ret;
-	bool lazy = false;
-	pte_t *pte;
-	unsigned long mfn;
 
 	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
 	if (ret)
@@ -898,36 +944,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 			gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
 						&map_ops[i].status, __func__);
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return ret;
-
-	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
-		arch_enter_lazy_mmu_mode();
-		lazy = true;
-	}
-
-	for (i = 0; i < count; i++) {
-		/* Do not add to override if the map failed. */
-		if (map_ops[i].status)
-			continue;
-
-		if (map_ops[i].flags & GNTMAP_contains_pte) {
-			pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
-				(map_ops[i].host_addr & ~PAGE_MASK));
-			mfn = pte_mfn(*pte);
-		} else {
-			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
-		}
-		ret = m2p_add_override(mfn, pages[i], kmap_ops ?
-				       &kmap_ops[i] : NULL);
-		if (ret)
-			return ret;
-	}
-
-	if (lazy)
-		arch_leave_lazy_mmu_mode();
-
-	return ret;
+	return set_foreign_p2m_mapping(map_ops, kmap_ops, pages, count);
 }
 EXPORT_SYMBOL_GPL(gnttab_map_refs);
 
@@ -935,32 +952,13 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 		      struct gnttab_map_grant_ref *kmap_ops,
 		      struct page **pages, unsigned int count)
 {
-	int i, ret;
-	bool lazy = false;
+	int ret;
 
 	ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
 	if (ret)
 		return ret;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return ret;
-
-	if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
-		arch_enter_lazy_mmu_mode();
-		lazy = true;
-	}
-
-	for (i = 0; i < count; i++) {
-		ret = m2p_remove_override(pages[i], kmap_ops ?
-				       &kmap_ops[i] : NULL);
-		if (ret)
-			return ret;
-	}
-
-	if (lazy)
-		arch_leave_lazy_mmu_mode();
-
-	return ret;
+	return clear_foreign_p2m_mapping(unmap_ops, kmap_ops, pages, count);
 }
 EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
 
@@ -1043,10 +1041,11 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 	unsigned int nr_gframes = end_idx + 1;
 	int rc;
 
-	if (xen_hvm_domain()) {
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
 		struct xen_add_to_physmap xatp;
 		unsigned int i = end_idx;
 		rc = 0;
+		BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
 		/*
 		 * Loop backwards, so that the first hypercall has the largest
 		 * index, ensuring that the table will grow only once.
@@ -1055,7 +1054,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 			xatp.domid = DOMID_SELF;
 			xatp.idx = i;
 			xatp.space = XENMAPSPACE_grant_table;
-			xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
+			xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
 			rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
 			if (rc != 0) {
 				pr_warn("grant table add_to_physmap failed, err=%d\n",
@@ -1118,10 +1117,8 @@ static void gnttab_request_version(void)
 	int rc;
 	struct gnttab_set_version gsv;
 
-	if (xen_hvm_domain())
-		gsv.version = 1;
-	else
-		gsv.version = 2;
+	gsv.version = 1;
+
 	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
 	if (rc == 0 && gsv.version == 2) {
 		grant_table_version = 2;
@@ -1152,21 +1149,15 @@ static int gnttab_setup(void)
 	if (max_nr_gframes < nr_grant_frames)
 		return -ENOSYS;
 
-	if (xen_pv_domain())
-		return gnttab_map(0, nr_grant_frames - 1);
-
-	if (gnttab_shared.addr == NULL) {
-		gnttab_shared.addr = xen_remap(xen_hvm_resume_frames,
-						PAGE_SIZE * max_nr_gframes);
+	if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
+		gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
 		if (gnttab_shared.addr == NULL) {
-			pr_warn("Failed to ioremap gnttab share frames!\n");
+			pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n",
+				(unsigned long)xen_auto_xlat_grant_frames.vaddr);
 			return -ENOMEM;
 		}
 	}
-
-	gnttab_map(0, nr_grant_frames - 1);
-
-	return 0;
+	return gnttab_map(0, nr_grant_frames - 1);
 }
 
 int gnttab_resume(void)
@@ -1177,7 +1168,8 @@ int gnttab_resume(void)
 
 int gnttab_suspend(void)
 {
-	gnttab_interface->unmap_frames();
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
+		gnttab_interface->unmap_frames();
 	return 0;
 }
 
@@ -1203,19 +1195,20 @@ static int gnttab_expand(unsigned int req_entries)
 int gnttab_init(void)
 {
 	int i;
+	unsigned long max_nr_grant_frames;
 	unsigned int max_nr_glist_frames, nr_glist_frames;
 	unsigned int nr_init_grefs;
 	int ret;
 
 	gnttab_request_version();
+	max_nr_grant_frames = gnttab_max_grant_frames();
 	nr_grant_frames = 1;
-	boot_max_nr_grant_frames = __max_nr_grant_frames();
 
 	/* Determine the maximum number of frames required for the
 	 * grant reference free list on the current hypervisor.
 	 */
 	BUG_ON(grefs_per_grant_frame == 0);
-	max_nr_glist_frames = (boot_max_nr_grant_frames *
+	max_nr_glist_frames = (max_nr_grant_frames *
 			       grefs_per_grant_frame / RPP);
 
 	gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
@@ -1232,6 +1225,11 @@ int gnttab_init(void)
 		}
 	}
 
+	ret = arch_gnttab_init(max_nr_grant_frames,
+			       nr_status_frames(max_nr_grant_frames));
+	if (ret < 0)
+		goto ini_nomem;
+
 	if (gnttab_setup() < 0) {
 		ret = -ENODEV;
 		goto ini_nomem;
@@ -1268,5 +1266,6 @@ static int __gnttab_init(void)
 
 	return gnttab_init();
 }
-
-core_initcall(__gnttab_init);
+/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
+ * beforehand to initialize xen_auto_xlat_grant_frames. */
+core_initcall_sync(__gnttab_init);
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 624e8dc2453..5f1e1f3cd18 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -41,32 +41,23 @@ static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
 
 struct suspend_info {
 	int cancelled;
-	unsigned long arg; /* extra hypercall argument */
-	void (*pre)(void);
-	void (*post)(int cancelled);
 };
 
-#ifdef CONFIG_HIBERNATE_CALLBACKS
-static void xen_hvm_post_suspend(int cancelled)
-{
-	xen_arch_hvm_post_suspend(cancelled);
-	gnttab_resume();
-}
+static RAW_NOTIFIER_HEAD(xen_resume_notifier);
 
-static void xen_pre_suspend(void)
+void xen_resume_notifier_register(struct notifier_block *nb)
 {
-	xen_mm_pin_all();
-	gnttab_suspend();
-	xen_arch_pre_suspend();
+	raw_notifier_chain_register(&xen_resume_notifier, nb);
 }
+EXPORT_SYMBOL_GPL(xen_resume_notifier_register);
 
-static void xen_post_suspend(int cancelled)
+void xen_resume_notifier_unregister(struct notifier_block *nb)
 {
-	xen_arch_post_suspend(cancelled);
-	gnttab_resume();
-	xen_mm_unpin_all();
+	raw_notifier_chain_unregister(&xen_resume_notifier, nb);
 }
+EXPORT_SYMBOL_GPL(xen_resume_notifier_unregister);
 
+#ifdef CONFIG_HIBERNATE_CALLBACKS
 static int xen_suspend(void *data)
 {
 	struct suspend_info *si = data;
@@ -80,22 +71,23 @@ static int xen_suspend(void *data)
 		return err;
 	}
 
-	if (si->pre)
-		si->pre();
+	gnttab_suspend();
+	xen_arch_pre_suspend();
 
 	/*
 	 * This hypercall returns 1 if suspend was cancelled
 	 * or the domain was merely checkpointed, and 0 if it
 	 * is resuming in a new domain.
 	 */
-	si->cancelled = HYPERVISOR_suspend(si->arg);
+	si->cancelled = HYPERVISOR_suspend(xen_pv_domain()
+                                           ? virt_to_mfn(xen_start_info)
+                                           : 0);
 
-	if (si->post)
-		si->post(si->cancelled);
+	xen_arch_post_suspend(si->cancelled);
+	gnttab_resume();
 
 	if (!si->cancelled) {
 		xen_irq_resume();
-		xen_console_resume();
 		xen_timer_resume();
 	}
 
@@ -140,18 +132,14 @@ static void do_suspend(void)
 
 	si.cancelled = 1;
 
-	if (xen_hvm_domain()) {
-		si.arg = 0UL;
-		si.pre = NULL;
-		si.post = &xen_hvm_post_suspend;
-	} else {
-		si.arg = virt_to_mfn(xen_start_info);
-		si.pre = &xen_pre_suspend;
-		si.post = &xen_post_suspend;
-	}
-
 	err = stop_machine(xen_suspend, &si, cpumask_of(0));
 
+	/* Resume console as early as possible. */
+	if (!si.cancelled)
+		xen_console_resume();
+
+	raw_notifier_call_chain(&xen_resume_notifier, 0, NULL);
+
 	dpm_resume_start(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
 
 	if (err) {
@@ -182,10 +170,32 @@ struct shutdown_handler {
 	void (*cb)(void);
 };
 
+static int poweroff_nb(struct notifier_block *cb, unsigned long code, void *unused)
+{
+	switch (code) {
+	case SYS_DOWN:
+	case SYS_HALT:
+	case SYS_POWER_OFF:
+		shutting_down = SHUTDOWN_POWEROFF;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
 static void do_poweroff(void)
 {
-	shutting_down = SHUTDOWN_POWEROFF;
-	orderly_poweroff(false);
+	switch (system_state) {
+	case SYSTEM_BOOTING:
+		orderly_poweroff(true);
+		break;
+	case SYSTEM_RUNNING:
+		orderly_poweroff(false);
+		break;
+	default:
+		/* Don't do it when we are halting/rebooting. */
+		pr_info("Ignoring Xen toolstack shutdown.\n");
+		break;
+	}
 }
 
 static void do_reboot(void)
@@ -291,6 +301,10 @@ static struct xenbus_watch shutdown_watch = {
 	.callback = shutdown_handler
 };
 
+static struct notifier_block xen_reboot_nb = {
+	.notifier_call = poweroff_nb,
+};
+
 static int setup_shutdown_watcher(void)
 {
 	int err;
@@ -301,6 +315,7 @@ static int setup_shutdown_watcher(void)
 		return err;
 	}
 
+
 #ifdef CONFIG_MAGIC_SYSRQ
 	err = register_xenbus_watch(&sysrq_watch);
 	if (err) {
@@ -329,6 +344,7 @@ int xen_setup_shutdown_event(void)
 	if (!xen_domain())
 		return -ENODEV;
 	register_xenstore_notifier(&xenstore_notifier);
+	register_reboot_notifier(&xen_reboot_nb);
 
 	return 0;
 }
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 18fff88254e..dd9c249ea31 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -26,6 +26,9 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 #include "../pci/pci.h"
+#ifdef CONFIG_PCI_MMCONFIG
+#include <asm/pci_x86.h>
+#endif
 
 static bool __read_mostly pci_seg_supported = true;
 
@@ -58,12 +61,12 @@ static int xen_add_device(struct device *dev)
 			add.flags = XEN_PCI_DEV_EXTFN;
 
 #ifdef CONFIG_ACPI
-		handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+		handle = ACPI_HANDLE(&pci_dev->dev);
 		if (!handle && pci_dev->bus->bridge)
-			handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+			handle = ACPI_HANDLE(pci_dev->bus->bridge);
 #ifdef CONFIG_PCI_IOV
 		if (!handle && pci_dev->is_virtfn)
-			handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+			handle = ACPI_HANDLE(physfn->bus->bridge);
 #endif
 		if (handle) {
 			acpi_status status;
@@ -192,3 +195,49 @@ static int __init register_xen_pci_notifier(void)
 }
 
 arch_initcall(register_xen_pci_notifier);
+
+#ifdef CONFIG_PCI_MMCONFIG
+static int __init xen_mcfg_late(void)
+{
+	struct pci_mmcfg_region *cfg;
+	int rc;
+
+	if (!xen_initial_domain())
+		return 0;
+
+	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+		return 0;
+
+	if (list_empty(&pci_mmcfg_list))
+		return 0;
+
+	/* Check whether they are in the right area. */
+	list_for_each_entry(cfg, &pci_mmcfg_list, list) {
+		struct physdev_pci_mmcfg_reserved r;
+
+		r.address = cfg->address;
+		r.segment = cfg->segment;
+		r.start_bus = cfg->start_bus;
+		r.end_bus = cfg->end_bus;
+		r.flags = XEN_PCI_MMCFG_RESERVED;
+
+		rc = HYPERVISOR_physdev_op(PHYSDEVOP_pci_mmcfg_reserved, &r);
+		switch (rc) {
+		case 0:
+		case -ENOSYS:
+			continue;
+
+		default:
+			pr_warn("Failed to report MMCONFIG reservation"
+				" state for %s to hypervisor"
+				" (%d)\n",
+				cfg->name, rc);
+		}
+	}
+	return 0;
+}
+/*
+ * Needs to be done after acpi_init which are subsys_initcall.
+ */
+subsys_initcall_sync(xen_mcfg_late);
+#endif
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
index 79e1dff7ed4..0aac403d53f 100644
--- a/drivers/xen/pcpu.c
+++ b/drivers/xen/pcpu.c
@@ -40,6 +40,7 @@
 #include <linux/capability.h>
 
 #include <xen/xen.h>
+#include <xen/acpi.h>
 #include <xen/xenbus.h>
 #include <xen/events.h>
 #include <xen/interface/platform.h>
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 99db9e1eb8b..3454973dc3b 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -45,7 +45,7 @@ static unsigned long platform_mmio_alloc;
 static unsigned long platform_mmiolen;
 static uint64_t callback_via;
 
-unsigned long alloc_xen_mmio(unsigned long len)
+static unsigned long alloc_xen_mmio(unsigned long len)
 {
 	unsigned long addr;
 
@@ -84,7 +84,7 @@ static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id)
 static int xen_allocate_irq(struct pci_dev *pdev)
 {
 	return request_irq(pdev->irq, do_hvm_evtchn_intr,
-			IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
+			IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
 			"xen-platform-pci", pdev);
 }
 
@@ -108,6 +108,7 @@ static int platform_pci_init(struct pci_dev *pdev,
 	long ioaddr;
 	long mmio_addr, mmio_len;
 	unsigned int max_nr_gframes;
+	unsigned long grant_frames;
 
 	if (!xen_domain())
 		return -ENODEV;
@@ -154,13 +155,17 @@ static int platform_pci_init(struct pci_dev *pdev,
 	}
 
 	max_nr_gframes = gnttab_max_grant_frames();
-	xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
-	ret = gnttab_init();
+	grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
+	ret = gnttab_setup_auto_xlat_frames(grant_frames);
 	if (ret)
 		goto out;
+	ret = gnttab_init();
+	if (ret)
+		goto grant_out;
 	xenbus_probe(NULL);
 	return 0;
-
+grant_out:
+	gnttab_free_auto_xlat_frames();
 out:
 	pci_release_region(pdev, 0);
 mem_out:
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 8e74590fa1b..569a13b9e85 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -533,12 +533,17 @@ static void privcmd_close(struct vm_area_struct *vma)
 {
 	struct page **pages = vma->vm_private_data;
 	int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int rc;
 
 	if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages)
 		return;
 
-	xen_unmap_domain_mfn_range(vma, numpgs, pages);
-	free_xenballooned_pages(numpgs, pages);
+	rc = xen_unmap_domain_mfn_range(vma, numpgs, pages);
+	if (rc == 0)
+		free_xenballooned_pages(numpgs, pages);
+	else
+		pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n",
+			numpgs, rc);
 	kfree(pages);
 }
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 1b2277c311d..ebd8f218a78 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -42,12 +42,31 @@
 #include <xen/page.h>
 #include <xen/xen-ops.h>
 #include <xen/hvc-console.h>
+
+#include <asm/dma-mapping.h>
+#include <asm/xen/page-coherent.h>
+
+#include <trace/events/swiotlb.h>
 /*
  * Used to do a quick range check in swiotlb_tbl_unmap_single and
  * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
  * API.
  */
 
+#ifndef CONFIG_X86
+static unsigned long dma_alloc_coherent_mask(struct device *dev,
+					    gfp_t gfp)
+{
+	unsigned long dma_mask = 0;
+
+	dma_mask = dev->coherent_dma_mask;
+	if (!dma_mask)
+		dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32);
+
+	return dma_mask;
+}
+#endif
+
 static char *xen_io_tlb_start, *xen_io_tlb_end;
 static unsigned long xen_io_tlb_nslabs;
 /*
@@ -56,17 +75,35 @@ static unsigned long xen_io_tlb_nslabs;
 
 static u64 start_dma_addr;
 
-static dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
+/*
+ * Both of these functions should avoid PFN_PHYS because phys_addr_t
+ * can be 32bit when dma_addr_t is 64bit leading to a loss in
+ * information if the shift is done before casting to 64bit.
+ */
+static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
 {
-	return phys_to_machine(XPADDR(paddr)).maddr;
+	unsigned long mfn = pfn_to_mfn(PFN_DOWN(paddr));
+	dma_addr_t dma = (dma_addr_t)mfn << PAGE_SHIFT;
+
+	dma |= paddr & ~PAGE_MASK;
+
+	return dma;
 }
 
-static phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
+static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
 {
-	return machine_to_phys(XMADDR(baddr)).paddr;
+	unsigned long pfn = mfn_to_pfn(PFN_DOWN(baddr));
+	dma_addr_t dma = (dma_addr_t)pfn << PAGE_SHIFT;
+	phys_addr_t paddr = dma;
+
+	BUG_ON(paddr != dma); /* truncation has occurred, should never happen */
+
+	paddr |= baddr & ~PAGE_MASK;
+
+	return paddr;
 }
 
-static dma_addr_t xen_virt_to_bus(void *address)
+static inline dma_addr_t xen_virt_to_bus(void *address)
 {
 	return xen_phys_to_bus(virt_to_phys(address));
 }
@@ -89,7 +126,7 @@ static int check_pages_physically_contiguous(unsigned long pfn,
 	return 1;
 }
 
-static int range_straddles_page_boundary(phys_addr_t p, size_t size)
+static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
 {
 	unsigned long pfn = PFN_DOWN(p);
 	unsigned int offset = p & ~PAGE_MASK;
@@ -126,6 +163,8 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
 {
 	int i, rc;
 	int dma_bits;
+	dma_addr_t dma_handle;
+	phys_addr_t p = virt_to_phys(buf);
 
 	dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
 
@@ -135,9 +174,9 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
 
 		do {
 			rc = xen_create_contiguous_region(
-				(unsigned long)buf + (i << IO_TLB_SHIFT),
+				p + (i << IO_TLB_SHIFT),
 				get_order(slabs << IO_TLB_SHIFT),
-				dma_bits);
+				dma_bits, &dma_handle);
 		} while (rc && dma_bits++ < max_dma_bits);
 		if (rc)
 			return rc;
@@ -263,7 +302,6 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	void *ret;
 	int order = get_order(size);
 	u64 dma_mask = DMA_BIT_MASK(32);
-	unsigned long vstart;
 	phys_addr_t phys;
 	dma_addr_t dev_addr;
 
@@ -278,8 +316,12 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	if (dma_alloc_from_coherent(hwdev, size, dma_handle, &ret))
 		return ret;
 
-	vstart = __get_free_pages(flags, order);
-	ret = (void *)vstart;
+	/* On ARM this function returns an ioremap'ped virtual address for
+	 * which virt_to_phys doesn't return the corresponding physical
+	 * address. In fact on ARM virt_to_phys only works for kernel direct
+	 * mapped RAM memory. Also see comment below.
+	 */
+	ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs);
 
 	if (!ret)
 		return ret;
@@ -287,18 +329,21 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	if (hwdev && hwdev->coherent_dma_mask)
 		dma_mask = dma_alloc_coherent_mask(hwdev, flags);
 
-	phys = virt_to_phys(ret);
+	/* At this point dma_handle is the physical address, next we are
+	 * going to set it to the machine address.
+	 * Do not use virt_to_phys(ret) because on ARM it doesn't correspond
+	 * to *dma_handle. */
+	phys = *dma_handle;
 	dev_addr = xen_phys_to_bus(phys);
 	if (((dev_addr + size - 1 <= dma_mask)) &&
 	    !range_straddles_page_boundary(phys, size))
 		*dma_handle = dev_addr;
 	else {
-		if (xen_create_contiguous_region(vstart, order,
-						 fls64(dma_mask)) != 0) {
-			free_pages(vstart, order);
+		if (xen_create_contiguous_region(phys, order,
+						 fls64(dma_mask), dma_handle) != 0) {
+			xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
 			return NULL;
 		}
-		*dma_handle = virt_to_machine(ret).maddr;
 	}
 	memset(ret, 0, size);
 	return ret;
@@ -319,13 +364,15 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 	if (hwdev && hwdev->coherent_dma_mask)
 		dma_mask = hwdev->coherent_dma_mask;
 
-	phys = virt_to_phys(vaddr);
+	/* do not use virt_to_phys because on ARM it doesn't return you the
+	 * physical address */
+	phys = xen_bus_to_phys(dev_addr);
 
 	if (((dev_addr + size - 1 > dma_mask)) ||
 	    range_straddles_page_boundary(phys, size))
-		xen_destroy_contiguous_region((unsigned long)vaddr, order);
+		xen_destroy_contiguous_region(phys, order);
 
-	free_pages((unsigned long)vaddr, order);
+	xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
 
@@ -352,16 +399,25 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	 * buffering it.
 	 */
 	if (dma_capable(dev, dev_addr, size) &&
-	    !range_straddles_page_boundary(phys, size) && !swiotlb_force)
+	    !range_straddles_page_boundary(phys, size) && !swiotlb_force) {
+		/* we are not interested in the dma_addr returned by
+		 * xen_dma_map_page, only in the potential cache flushes executed
+		 * by the function. */
+		xen_dma_map_page(dev, page, offset, size, dir, attrs);
 		return dev_addr;
+	}
 
 	/*
 	 * Oh well, have to allocate and map a bounce buffer.
 	 */
+	trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
+
 	map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir);
 	if (map == SWIOTLB_MAP_ERROR)
 		return DMA_ERROR_CODE;
 
+	xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT),
+					map & ~PAGE_MASK, size, dir, attrs);
 	dev_addr = xen_phys_to_bus(map);
 
 	/*
@@ -384,12 +440,15 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
  * whatever the device wrote there.
  */
 static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-			     size_t size, enum dma_data_direction dir)
+			     size_t size, enum dma_data_direction dir,
+				 struct dma_attrs *attrs)
 {
 	phys_addr_t paddr = xen_bus_to_phys(dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
+	xen_dma_unmap_page(hwdev, paddr, size, dir, attrs);
+
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr)) {
 		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
@@ -412,7 +471,7 @@ void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 			    size_t size, enum dma_data_direction dir,
 			    struct dma_attrs *attrs)
 {
-	xen_unmap_single(hwdev, dev_addr, size, dir);
+	xen_unmap_single(hwdev, dev_addr, size, dir, attrs);
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_page);
 
@@ -435,11 +494,15 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 
 	BUG_ON(dir == DMA_NONE);
 
+	if (target == SYNC_FOR_CPU)
+		xen_dma_sync_single_for_cpu(hwdev, paddr, size, dir);
+
 	/* NOTE: We use dev_addr here, not paddr! */
-	if (is_xen_swiotlb_buffer(dev_addr)) {
+	if (is_xen_swiotlb_buffer(dev_addr))
 		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
-		return;
-	}
+
+	if (target == SYNC_FOR_DEVICE)
+		xen_dma_sync_single_for_cpu(hwdev, paddr, size, dir);
 
 	if (dir != DMA_FROM_DEVICE)
 		return;
@@ -502,16 +565,31 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 								 sg->length,
 								 dir);
 			if (map == SWIOTLB_MAP_ERROR) {
+				dev_warn(hwdev, "swiotlb buffer is full\n");
 				/* Don't panic here, we expect map_sg users
 				   to do proper error handling. */
 				xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
 							   attrs);
 				sg_dma_len(sgl) = 0;
-				return DMA_ERROR_CODE;
+				return 0;
 			}
+			xen_dma_map_page(hwdev, pfn_to_page(map >> PAGE_SHIFT),
+						map & ~PAGE_MASK,
+						sg->length,
+						dir,
+						attrs);
 			sg->dma_address = xen_phys_to_bus(map);
-		} else
+		} else {
+			/* we are not interested in the dma_addr returned by
+			 * xen_dma_map_page, only in the potential cache flushes executed
+			 * by the function. */
+			xen_dma_map_page(hwdev, pfn_to_page(paddr >> PAGE_SHIFT),
+						paddr & ~PAGE_MASK,
+						sg->length,
+						dir,
+						attrs);
 			sg->dma_address = dev_addr;
+		}
 		sg_dma_len(sg) = sg->length;
 	}
 	return nelems;
@@ -533,7 +611,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i)
-		xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
+		xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, attrs);
 
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
@@ -593,3 +671,15 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
 	return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask;
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_dma_supported);
+
+int
+xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask)
+{
+	if (!dev->dma_mask || !xen_swiotlb_dma_supported(dev, dma_mask))
+		return -EIO;
+
+	*dev->dma_mask = dma_mask;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xen_swiotlb_set_dma_mask);
diff --git a/drivers/xen/xen-acpi-cpuhotplug.c b/drivers/xen/xen-acpi-cpuhotplug.c
index 8dae6c13063..3e62ee4b3b6 100644
--- a/drivers/xen/xen-acpi-cpuhotplug.c
+++ b/drivers/xen/xen-acpi-cpuhotplug.c
@@ -24,10 +24,7 @@
 #include <linux/cpu.h>
 #include <linux/acpi.h>
 #include <linux/uaccess.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
 #include <acpi/processor.h>
-
 #include <xen/acpi.h>
 #include <xen/interface/platform.h>
 #include <asm/xen/hypercall.h>
@@ -269,7 +266,8 @@ static void acpi_processor_hotplug_notify(acpi_handle handle,
 		if (!is_processor_present(handle))
 			break;
 
-		if (!acpi_bus_get_device(handle, &device))
+		acpi_bus_get_device(handle, &device);
+		if (acpi_device_enumerated(device))
 			break;
 
 		result = acpi_bus_scan(handle);
@@ -277,8 +275,9 @@ static void acpi_processor_hotplug_notify(acpi_handle handle,
 			pr_err(PREFIX "Unable to add the device\n");
 			break;
 		}
-		result = acpi_bus_get_device(handle, &device);
-		if (result) {
+		device = NULL;
+		acpi_bus_get_device(handle, &device);
+		if (!acpi_device_enumerated(device)) {
 			pr_err(PREFIX "Missing device object\n");
 			break;
 		}
@@ -314,7 +313,7 @@ static void acpi_processor_hotplug_notify(acpi_handle handle,
 		goto out;
 	}
 
-	(void) acpi_evaluate_hotplug_ost(handle, event, ost_code, NULL);
+	(void) acpi_evaluate_ost(handle, event, ost_code, NULL);
 
 out:
 	acpi_scan_lock_release();
diff --git a/drivers/xen/xen-acpi-memhotplug.c b/drivers/xen/xen-acpi-memhotplug.c
index 9083f1e474f..34e40b733f9 100644
--- a/drivers/xen/xen-acpi-memhotplug.c
+++ b/drivers/xen/xen-acpi-memhotplug.c
@@ -22,7 +22,6 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/acpi.h>
-#include <acpi/acpi_drivers.h>
 #include <xen/acpi.h>
 #include <xen/interface/platform.h>
 #include <asm/xen/hypercall.h>
@@ -169,7 +168,7 @@ static int acpi_memory_get_device(acpi_handle handle,
 	acpi_scan_lock_acquire();
 
 	acpi_bus_get_device(handle, &device);
-	if (device)
+	if (acpi_device_enumerated(device))
 		goto end;
 
 	/*
@@ -182,8 +181,9 @@ static int acpi_memory_get_device(acpi_handle handle,
 		result = -EINVAL;
 		goto out;
 	}
-	result = acpi_bus_get_device(handle, &device);
-	if (result) {
+	device = NULL;
+	acpi_bus_get_device(handle, &device);
+	if (!acpi_device_enumerated(device)) {
 		pr_warn(PREFIX "Missing device object\n");
 		result = -EINVAL;
 		goto out;
@@ -285,7 +285,7 @@ static void acpi_memory_device_notify(acpi_handle handle, u32 event, void *data)
 		return;
 	}
 
-	(void) acpi_evaluate_hotplug_ost(handle, event, ost_code, NULL);
+	(void) acpi_evaluate_ost(handle, event, ost_code, NULL);
 	return;
 }
 
diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c
index 59708fdd068..f83b754505f 100644
--- a/drivers/xen/xen-acpi-pad.c
+++ b/drivers/xen/xen-acpi-pad.c
@@ -18,11 +18,10 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
-#include <asm/xen/hypercall.h>
+#include <linux/acpi.h>
 #include <xen/interface/version.h>
 #include <xen/xen-ops.h>
+#include <asm/xen/hypercall.h>
 
 #define ACPI_PROCESSOR_AGGREGATOR_CLASS	"acpi_pad"
 #define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
@@ -78,27 +77,14 @@ static int acpi_pad_pur(acpi_handle handle)
 	return num;
 }
 
-/* Notify firmware how many CPUs are idle */
-static void acpi_pad_ost(acpi_handle handle, int stat,
-	uint32_t idle_nums)
-{
-	union acpi_object params[3] = {
-		{.type = ACPI_TYPE_INTEGER,},
-		{.type = ACPI_TYPE_INTEGER,},
-		{.type = ACPI_TYPE_BUFFER,},
-	};
-	struct acpi_object_list arg_list = {3, params};
-
-	params[0].integer.value = ACPI_PROCESSOR_AGGREGATOR_NOTIFY;
-	params[1].integer.value =  stat;
-	params[2].buffer.length = 4;
-	params[2].buffer.pointer = (void *)&idle_nums;
-	acpi_evaluate_object(handle, "_OST", &arg_list, NULL);
-}
-
 static void acpi_pad_handle_notify(acpi_handle handle)
 {
 	int idle_nums;
+	struct acpi_buffer param = {
+		.length = 4,
+		.pointer = (void *)&idle_nums,
+	};
+
 
 	mutex_lock(&xen_cpu_lock);
 	idle_nums = acpi_pad_pur(handle);
@@ -110,7 +96,8 @@ static void acpi_pad_handle_notify(acpi_handle handle)
 	idle_nums = xen_acpi_pad_idle_cpus(idle_nums)
 		    ?: xen_acpi_pad_idle_cpus_num();
 	if (idle_nums >= 0)
-		acpi_pad_ost(handle, 0, idle_nums);
+		acpi_evaluate_ost(handle, ACPI_PROCESSOR_AGGREGATOR_NOTIFY,
+				  0, &param);
 	mutex_unlock(&xen_cpu_lock);
 }
 
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 13bc6c31c06..59fc190f1e9 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -27,12 +27,10 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/syscore_ops.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
+#include <linux/acpi.h>
 #include <acpi/processor.h>
-
 #include <xen/xen.h>
+#include <xen/xen-ops.h>
 #include <xen/interface/platform.h>
 #include <asm/xen/hypercall.h>
 
@@ -129,7 +127,7 @@ static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
 			pr_debug("     C%d: %s %d uS\n",
 				 cx->type, cx->desc, (u32)cx->latency);
 		}
-	} else if (ret != -EINVAL)
+	} else if ((ret != -EINVAL) && (ret != -ENOSYS))
 		/* EINVAL means the ACPI ID is incorrect - meaning the ACPI
 		 * table is referencing a non-existing CPU - which can happen
 		 * with broken ACPI tables. */
@@ -261,7 +259,7 @@ static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
 			(u32) perf->states[i].power,
 			(u32) perf->states[i].transition_latency);
 		}
-	} else if (ret != -EINVAL)
+	} else if ((ret != -EINVAL) && (ret != -ENOSYS))
 		/* EINVAL means the ACPI ID is incorrect - meaning the ACPI
 		 * table is referencing a non-existing CPU - which can happen
 		 * with broken ACPI tables. */
@@ -497,14 +495,15 @@ static int xen_upload_processor_pm_data(void)
 	return rc;
 }
 
-static void xen_acpi_processor_resume(void)
+static int xen_acpi_processor_resume(struct notifier_block *nb,
+				     unsigned long action, void *data)
 {
 	bitmap_zero(acpi_ids_done, nr_acpi_bits);
-	xen_upload_processor_pm_data();
+	return xen_upload_processor_pm_data();
 }
 
-static struct syscore_ops xap_syscore_ops = {
-	.resume	= xen_acpi_processor_resume,
+struct notifier_block xen_acpi_processor_resume_nb = {
+	.notifier_call = xen_acpi_processor_resume,
 };
 
 static int __init xen_acpi_processor_init(void)
@@ -557,7 +556,7 @@ static int __init xen_acpi_processor_init(void)
 	if (rc)
 		goto err_unregister;
 
-	register_syscore_ops(&xap_syscore_ops);
+	xen_resume_notifier_register(&xen_acpi_processor_resume_nb);
 
 	return 0;
 err_unregister:
@@ -576,7 +575,7 @@ static void __exit xen_acpi_processor_exit(void)
 {
 	int i;
 
-	unregister_syscore_ops(&xap_syscore_ops);
+	xen_resume_notifier_unregister(&xen_acpi_processor_resume_nb);
 	kfree(acpi_ids_done);
 	kfree(acpi_id_present);
 	kfree(acpi_id_cst_present);
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 62fcd485f0a..d57a173685f 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -242,6 +242,15 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
 	return found_dev;
 }
 
+/*
+ * Called when:
+ *  - XenBus state has been reconfigure (pci unplug). See xen_pcibk_remove_device
+ *  - XenBus state has been disconnected (guest shutdown). See xen_pcibk_xenbus_remove
+ *  - 'echo BDF > unbind' on pciback module with no guest attached. See pcistub_remove
+ *  - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
+ *
+ *  As such we have to be careful.
+ */
 void pcistub_put_pci_dev(struct pci_dev *dev)
 {
 	struct pcistub_device *psdev, *found_psdev = NULL;
@@ -272,16 +281,16 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
 	 * and want to inhibit the user from fiddling with 'reset'
 	 */
 	pci_reset_function(dev);
-	pci_restore_state(psdev->dev);
+	pci_restore_state(dev);
 
 	/* This disables the device. */
-	xen_pcibk_reset_device(found_psdev->dev);
+	xen_pcibk_reset_device(dev);
 
 	/* And cleanup up our emulated fields. */
-	xen_pcibk_config_free_dyn_fields(found_psdev->dev);
-	xen_pcibk_config_reset_dev(found_psdev->dev);
+	xen_pcibk_config_reset_dev(dev);
+	xen_pcibk_config_free_dyn_fields(dev);
 
-	xen_unregister_device_domain_owner(found_psdev->dev);
+	xen_unregister_device_domain_owner(dev);
 
 	spin_lock_irqsave(&found_psdev->lock, flags);
 	found_psdev->pdev = NULL;
@@ -493,6 +502,8 @@ static int pcistub_seize(struct pci_dev *dev)
 	return err;
 }
 
+/* Called when 'bind'. This means we must _NOT_ call pci_reset_function or
+ * other functions that take the sysfs lock. */
 static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	int err = 0;
@@ -520,6 +531,8 @@ out:
 	return err;
 }
 
+/* Called when 'unbind'. This means we must _NOT_ call pci_reset_function or
+ * other functions that take the sysfs lock. */
 static void pcistub_remove(struct pci_dev *dev)
 {
 	struct pcistub_device *psdev, *found_psdev = NULL;
@@ -551,6 +564,8 @@ static void pcistub_remove(struct pci_dev *dev)
 			pr_warn("****** shutdown driver domain before binding device\n");
 			pr_warn("****** to other drivers or domains\n");
 
+			/* N.B. This ends up calling pcistub_put_pci_dev which ends up
+			 * doing the FLR. */
 			xen_pcibk_release_pci_dev(found_psdev->pdev,
 						found_psdev->dev);
 		}
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 64eb0cd8b8a..c4a0666de6f 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -213,12 +213,11 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
 		entries[i].vector = op->msix_entries[i].vector;
 	}
 
-	result = pci_enable_msix(dev, entries, op->value);
-
+	result = pci_enable_msix_exact(dev, entries, op->value);
 	if (result == 0) {
 		for (i = 0; i < op->value; i++) {
 			op->msix_entries[i].entry = entries[i].entry;
-			if (entries[i].vector)
+			if (entries[i].vector) {
 				op->msix_entries[i].vector =
 					xen_pirq_from_irq(entries[i].vector);
 				if (unlikely(verbose_request))
@@ -226,6 +225,7 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
 						"MSI-X[%d]: %d\n",
 						pci_name(dev), i,
 						op->msix_entries[i].vector);
+			}
 		}
 	} else
 		pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n",
@@ -348,9 +348,9 @@ void xen_pcibk_do_op(struct work_struct *data)
 	notify_remote_via_irq(pdev->evtchn_irq);
 
 	/* Mark that we're done. */
-	smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
+	smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
 	clear_bit(_PDEVF_op_active, &pdev->flags);
-	smp_mb__after_clear_bit(); /* /before/ final check for work */
+	smp_mb__after_atomic(); /* /before/ final check for work */
 
 	/* Check to see if the driver domain tried to start another request in
 	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index 3165ce361b0..51afff96c51 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -137,6 +137,8 @@ unlock:
 	/* Publish this device. */
 	if (!err)
 		err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
+	else
+		kfree(dev_entry);
 
 out:
 	return err;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index a9ed867afab..4a7e6e0a5f4 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -93,6 +93,8 @@ static void free_pdev(struct xen_pcibk_device *pdev)
 
 	xen_pcibk_disconnect(pdev);
 
+	/* N.B. This calls pcistub_put_pci_dev which does the FLR on all
+	 * of the PCIe devices. */
 	xen_pcibk_release_devices(pdev);
 
 	dev_set_drvdata(&pdev->xdev->dev, NULL);
@@ -286,6 +288,8 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
 	dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
 	xen_unregister_device_domain_owner(dev);
 
+	/* N.B. This ends up calling pcistub_put_pci_dev which ends up
+	 * doing the FLR. */
 	xen_pcibk_release_pci_dev(pdev, dev);
 
 out:
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 21e18c18c7a..3b2bffde534 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -170,11 +170,13 @@ static void frontswap_selfshrink(void)
 		tgt_frontswap_pages = cur_frontswap_pages -
 			(cur_frontswap_pages / frontswap_hysteresis);
 	frontswap_shrink(tgt_frontswap_pages);
+	frontswap_inertia_counter = frontswap_inertia;
 }
 
 #endif /* CONFIG_FRONTSWAP */
 
 #define MB2PAGES(mb)	((mb) << (20 - PAGE_SHIFT))
+#define PAGES2MB(pages) ((pages) >> (20 - PAGE_SHIFT))
 
 /*
  * Use current balloon size, the goal (vm_committed_as), and hysteresis
@@ -525,6 +527,7 @@ EXPORT_SYMBOL(register_xen_selfballooning);
 int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink)
 {
 	bool enable = false;
+	unsigned long reserve_pages;
 
 	if (!xen_domain())
 		return -ENODEV;
@@ -549,6 +552,26 @@ int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink)
 	if (!enable)
 		return -ENODEV;
 
+	/*
+	 * Give selfballoon_reserved_mb a default value(10% of total ram pages)
+	 * to make selfballoon not so aggressive.
+	 *
+	 * There are mainly two reasons:
+	 * 1) The original goal_page didn't consider some pages used by kernel
+	 *    space, like slab pages and memory used by device drivers.
+	 *
+	 * 2) The balloon driver may not give back memory to guest OS fast
+	 *    enough when the workload suddenly aquries a lot of physical memory.
+	 *
+	 * In both cases, the guest OS will suffer from memory pressure and
+	 * OOM killer may be triggered.
+	 * By reserving extra 10% of total ram pages, we can keep the system
+	 * much more reliably and response faster in some cases.
+	 */
+	if (!selfballoon_reserved_mb) {
+		reserve_pages = totalram_pages / 10;
+		selfballoon_reserved_mb = PAGES2MB(reserve_pages);
+	}
 	schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ);
 
 	return 0;
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index ec097d6f964..439c9dca9ee 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -45,6 +45,7 @@
 #include <xen/grant_table.h>
 #include <xen/xenbus.h>
 #include <xen/xen.h>
+#include <xen/features.h>
 
 #include "xenbus_probe.h"
 
@@ -400,33 +401,6 @@ EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
 
 
 /**
- * Bind to an existing interdomain event channel in another domain. Returns 0
- * on success and stores the local port in *port. On error, returns -errno,
- * switches the device to XenbusStateClosing, and saves the error in XenStore.
- */
-int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port)
-{
-	struct evtchn_bind_interdomain bind_interdomain;
-	int err;
-
-	bind_interdomain.remote_dom = dev->otherend_id;
-	bind_interdomain.remote_port = remote_port;
-
-	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
-					  &bind_interdomain);
-	if (err)
-		xenbus_dev_fatal(dev, err,
-				 "binding to event channel %d from domain %d",
-				 remote_port, dev->otherend_id);
-	else
-		*port = bind_interdomain.local_port;
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(xenbus_bind_evtchn);
-
-
-/**
  * Free an existing event channel. Returns 0 on success or -errno on error.
  */
 int xenbus_free_evtchn(struct xenbus_device *dev, int port)
@@ -743,7 +717,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
 
 void __init xenbus_ring_ops_init(void)
 {
-	if (xen_pv_domain())
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
 		ring_ops = &ring_ops_pv;
 	else
 		ring_ops = &ring_ops_hvm;
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 38e92b770e9..3c0a74b3e9b 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -384,12 +384,14 @@ static ssize_t nodename_show(struct device *dev,
 {
 	return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
 }
+static DEVICE_ATTR_RO(nodename);
 
 static ssize_t devtype_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
 }
+static DEVICE_ATTR_RO(devtype);
 
 static ssize_t modalias_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
@@ -397,14 +399,24 @@ static ssize_t modalias_show(struct device *dev,
 	return sprintf(buf, "%s:%s\n", dev->bus->name,
 		       to_xenbus_device(dev)->devicetype);
 }
+static DEVICE_ATTR_RO(modalias);
 
-struct device_attribute xenbus_dev_attrs[] = {
-	__ATTR_RO(nodename),
-	__ATTR_RO(devtype),
-	__ATTR_RO(modalias),
-	__ATTR_NULL
+static struct attribute *xenbus_dev_attrs[] = {
+	&dev_attr_nodename.attr,
+	&dev_attr_devtype.attr,
+	&dev_attr_modalias.attr,
+	NULL,
 };
-EXPORT_SYMBOL_GPL(xenbus_dev_attrs);
+
+static const struct attribute_group xenbus_dev_group = {
+	.attrs = xenbus_dev_attrs,
+};
+
+const struct attribute_group *xenbus_dev_groups[] = {
+	&xenbus_dev_group,
+	NULL,
+};
+EXPORT_SYMBOL_GPL(xenbus_dev_groups);
 
 int xenbus_probe_node(struct xen_bus_type *bus,
 		      const char *type,
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
index 146f857a36f..1085ec294a1 100644
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -54,7 +54,7 @@ enum xenstore_init {
 	XS_LOCAL,
 };
 
-extern struct device_attribute xenbus_dev_attrs[];
+extern const struct attribute_group *xenbus_dev_groups[];
 
 extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
 extern int xenbus_dev_probe(struct device *_dev);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 998bbbab816..5125dce11a6 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -200,7 +200,7 @@ static struct xen_bus_type xenbus_backend = {
 		.probe		= xenbus_dev_probe,
 		.remove		= xenbus_dev_remove,
 		.shutdown	= xenbus_dev_shutdown,
-		.dev_attrs	= xenbus_dev_attrs,
+		.dev_groups	= xenbus_dev_groups,
 	},
 };
 
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 34b20bfa4e8..cb385c10d2b 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -154,7 +154,7 @@ static struct xen_bus_type xenbus_frontend = {
 		.probe		= xenbus_frontend_dev_probe,
 		.remove		= xenbus_dev_remove,
 		.shutdown	= xenbus_dev_shutdown,
-		.dev_attrs	= xenbus_dev_attrs,
+		.dev_groups	= xenbus_dev_groups,
 
 		.pm		= &xenbus_pm_ops,
 	},
@@ -496,7 +496,7 @@ subsys_initcall(xenbus_probe_frontend_init);
 #ifndef MODULE
 static int __init boot_wait_for_devices(void)
 {
-	if (xen_hvm_domain() && !xen_platform_pci_unplug)
+	if (!xen_has_pv_devices())
 		return -ENODEV;
 
 	ready_to_wait_for_devices = 1;
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index b6d5fff43d1..ba804f3d827 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -50,6 +50,7 @@
 #include <xen/xenbus.h>
 #include <xen/xen.h>
 #include "xenbus_comms.h"
+#include "xenbus_probe.h"
 
 struct xs_stored_msg {
 	struct list_head list;
@@ -139,6 +140,29 @@ static int get_error(const char *errorstring)
 	return xsd_errors[i].errnum;
 }
 
+static bool xenbus_ok(void)
+{
+	switch (xen_store_domain_type) {
+	case XS_LOCAL:
+		switch (system_state) {
+		case SYSTEM_POWER_OFF:
+		case SYSTEM_RESTART:
+		case SYSTEM_HALT:
+			return false;
+		default:
+			break;
+		}
+		return true;
+	case XS_PV:
+	case XS_HVM:
+		/* FIXME: Could check that the remote domain is alive,
+		 * but it is normally initial domain. */
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
 static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
 {
 	struct xs_stored_msg *msg;
@@ -148,9 +172,20 @@ static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
 
 	while (list_empty(&xs_state.reply_list)) {
 		spin_unlock(&xs_state.reply_lock);
-		/* XXX FIXME: Avoid synchronous wait for response here. */
-		wait_event(xs_state.reply_waitq,
-			   !list_empty(&xs_state.reply_list));
+		if (xenbus_ok())
+			/* XXX FIXME: Avoid synchronous wait for response here. */
+			wait_event_timeout(xs_state.reply_waitq,
+					   !list_empty(&xs_state.reply_list),
+					   msecs_to_jiffies(500));
+		else {
+			/*
+			 * If we are in the process of being shut-down there is
+			 * no point of trying to contact XenBus - it is either
+			 * killed (xenstored application) or the other domain
+			 * has been killed or is unreachable.
+			 */
+			return ERR_PTR(-EIO);
+		}
 		spin_lock(&xs_state.reply_lock);
 	}
 
@@ -215,6 +250,9 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
 
 	mutex_unlock(&xs_state.request_mutex);
 
+	if (IS_ERR(ret))
+		return ret;
+
 	if ((msg->type == XS_TRANSACTION_END) ||
 	    ((req_msg.type == XS_TRANSACTION_START) &&
 	     (msg->type == XS_ERROR)))
diff --git a/drivers/xen/xencomm.c b/drivers/xen/xencomm.c
deleted file mode 100644
index 4793fc59454..00000000000
--- a/drivers/xen/xencomm.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- * Copyright (C) IBM Corp. 2006
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <asm/page.h>
-#include <xen/xencomm.h>
-#include <xen/interface/xen.h>
-#include <asm/xen/xencomm.h>	/* for xencomm_is_phys_contiguous() */
-
-static int xencomm_init(struct xencomm_desc *desc,
-			void *buffer, unsigned long bytes)
-{
-	unsigned long recorded = 0;
-	int i = 0;
-
-	while ((recorded < bytes) && (i < desc->nr_addrs)) {
-		unsigned long vaddr = (unsigned long)buffer + recorded;
-		unsigned long paddr;
-		int offset;
-		int chunksz;
-
-		offset = vaddr % PAGE_SIZE; /* handle partial pages */
-		chunksz = min(PAGE_SIZE - offset, bytes - recorded);
-
-		paddr = xencomm_vtop(vaddr);
-		if (paddr == ~0UL) {
-			printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n",
-			       __func__, vaddr);
-			return -EINVAL;
-		}
-
-		desc->address[i++] = paddr;
-		recorded += chunksz;
-	}
-
-	if (recorded < bytes) {
-		printk(KERN_DEBUG
-		       "%s: could only translate %ld of %ld bytes\n",
-		       __func__, recorded, bytes);
-		return -ENOSPC;
-	}
-
-	/* mark remaining addresses invalid (just for safety) */
-	while (i < desc->nr_addrs)
-		desc->address[i++] = XENCOMM_INVALID;
-
-	desc->magic = XENCOMM_MAGIC;
-
-	return 0;
-}
-
-static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask,
-					  void *buffer, unsigned long bytes)
-{
-	struct xencomm_desc *desc;
-	unsigned long buffer_ulong = (unsigned long)buffer;
-	unsigned long start = buffer_ulong & PAGE_MASK;
-	unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK;
-	unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT;
-	unsigned long size = sizeof(*desc) +
-		sizeof(desc->address[0]) * nr_addrs;
-
-	/*
-	 * slab allocator returns at least sizeof(void*) aligned pointer.
-	 * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might
-	 * cross page boundary.
-	 */
-	if (sizeof(*desc) > sizeof(void *)) {
-		unsigned long order = get_order(size);
-		desc = (struct xencomm_desc *)__get_free_pages(gfp_mask,
-							       order);
-		if (desc == NULL)
-			return NULL;
-
-		desc->nr_addrs =
-			((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) /
-			sizeof(*desc->address);
-	} else {
-		desc = kmalloc(size, gfp_mask);
-		if (desc == NULL)
-			return NULL;
-
-		desc->nr_addrs = nr_addrs;
-	}
-	return desc;
-}
-
-void xencomm_free(struct xencomm_handle *desc)
-{
-	if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) {
-		struct xencomm_desc *desc__ = (struct xencomm_desc *)desc;
-		if (sizeof(*desc__) > sizeof(void *)) {
-			unsigned long size = sizeof(*desc__) +
-				sizeof(desc__->address[0]) * desc__->nr_addrs;
-			unsigned long order = get_order(size);
-			free_pages((unsigned long)__va(desc), order);
-		} else
-			kfree(__va(desc));
-	}
-}
-
-static int xencomm_create(void *buffer, unsigned long bytes,
-			  struct xencomm_desc **ret, gfp_t gfp_mask)
-{
-	struct xencomm_desc *desc;
-	int rc;
-
-	pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes);
-
-	if (bytes == 0) {
-		/* don't create a descriptor; Xen recognizes NULL. */
-		BUG_ON(buffer != NULL);
-		*ret = NULL;
-		return 0;
-	}
-
-	BUG_ON(buffer == NULL); /* 'bytes' is non-zero */
-
-	desc = xencomm_alloc(gfp_mask, buffer, bytes);
-	if (!desc) {
-		printk(KERN_DEBUG "%s failure\n", "xencomm_alloc");
-		return -ENOMEM;
-	}
-
-	rc = xencomm_init(desc, buffer, bytes);
-	if (rc) {
-		printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc);
-		xencomm_free((struct xencomm_handle *)__pa(desc));
-		return rc;
-	}
-
-	*ret = desc;
-	return 0;
-}
-
-static struct xencomm_handle *xencomm_create_inline(void *ptr)
-{
-	unsigned long paddr;
-
-	BUG_ON(!xencomm_is_phys_contiguous((unsigned long)ptr));
-
-	paddr = (unsigned long)xencomm_pa(ptr);
-	BUG_ON(paddr & XENCOMM_INLINE_FLAG);
-	return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
-}
-
-/* "mini" routine, for stack-based communications: */
-static int xencomm_create_mini(void *buffer,
-	unsigned long bytes, struct xencomm_mini *xc_desc,
-	struct xencomm_desc **ret)
-{
-	int rc = 0;
-	struct xencomm_desc *desc;
-	BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0);
-
-	desc = (void *)xc_desc;
-
-	desc->nr_addrs = XENCOMM_MINI_ADDRS;
-
-	rc = xencomm_init(desc, buffer, bytes);
-	if (!rc)
-		*ret = desc;
-
-	return rc;
-}
-
-struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes)
-{
-	int rc;
-	struct xencomm_desc *desc;
-
-	if (xencomm_is_phys_contiguous((unsigned long)ptr))
-		return xencomm_create_inline(ptr);
-
-	rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL);
-
-	if (rc || desc == NULL)
-		return NULL;
-
-	return xencomm_pa(desc);
-}
-
-struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes,
-			struct xencomm_mini *xc_desc)
-{
-	int rc;
-	struct xencomm_desc *desc = NULL;
-
-	if (xencomm_is_phys_contiguous((unsigned long)ptr))
-		return xencomm_create_inline(ptr);
-
-	rc = xencomm_create_mini(ptr, bytes, xc_desc,
-				&desc);
-
-	if (rc)
-		return NULL;
-
-	return xencomm_pa(desc);
-}