aboutsummaryrefslogtreecommitdiff
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig51
-rw-r--r--drivers/xen/Makefile10
-rw-r--r--drivers/xen/acpi.c41
-rw-r--r--drivers/xen/balloon.c196
-rw-r--r--drivers/xen/cpu_hotplug.c6
-rw-r--r--drivers/xen/dbgp.c2
-rw-r--r--drivers/xen/events/Makefile5
-rw-r--r--drivers/xen/events/events_2l.c365
-rw-r--r--drivers/xen/events/events_base.c (renamed from drivers/xen/events.c)903
-rw-r--r--drivers/xen/events/events_fifo.c443
-rw-r--r--drivers/xen/events/events_internal.h151
-rw-r--r--drivers/xen/evtchn.c216
-rw-r--r--drivers/xen/fallback.c3
-rw-r--r--drivers/xen/gntalloc.c6
-rw-r--r--drivers/xen/gntdev.c21
-rw-r--r--drivers/xen/grant-table.c185
-rw-r--r--drivers/xen/manage.c110
-rw-r--r--drivers/xen/mcelog.c36
-rw-r--r--drivers/xen/pci.c55
-rw-r--r--drivers/xen/pcpu.c51
-rw-r--r--drivers/xen/platform-pci.c15
-rw-r--r--drivers/xen/privcmd.c98
-rw-r--r--drivers/xen/swiotlb-xen.c168
-rw-r--r--drivers/xen/tmem.c108
-rw-r--r--drivers/xen/xen-acpi-cpuhotplug.c462
-rw-r--r--drivers/xen/xen-acpi-memhotplug.c485
-rw-r--r--drivers/xen/xen-acpi-pad.c36
-rw-r--r--drivers/xen/xen-acpi-processor.c125
-rw-r--r--drivers/xen/xen-balloon.c6
-rw-r--r--drivers/xen/xen-pciback/conf_space_header.c16
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c119
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c32
-rw-r--r--drivers/xen/xen-pciback/vpci.c12
-rw-r--r--drivers/xen/xen-pciback/xenbus.c12
-rw-r--r--drivers/xen/xen-selfballoon.c142
-rw-r--r--drivers/xen/xen-stub.c100
-rw-r--r--drivers/xen/xenbus/xenbus_client.c36
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c13
-rw-r--r--drivers/xen/xenbus/xenbus_comms.h1
-rw-r--r--drivers/xen/xenbus/xenbus_dev_backend.c25
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c4
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c85
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h9
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c10
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c91
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c66
-rw-r--r--drivers/xen/xencomm.c217
-rw-r--r--drivers/xen/xenfs/super.c71
48 files changed, 3794 insertions, 1626 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index cabfa97f467..38fb36e1c59 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -3,7 +3,6 @@ menu "Xen driver support"
config XEN_BALLOON
bool "Xen memory balloon driver"
- depends on !ARM
default y
help
The balloon driver allows the Xen domain to request more memory from
@@ -19,11 +18,10 @@ config XEN_SELFBALLOONING
by the current usage of anonymous memory ("committed AS") and
controlled by various sysfs-settable parameters. Configuring
FRONTSWAP is highly recommended; if it is not configured, self-
- ballooning is disabled by default but can be enabled with the
- 'selfballooning' kernel boot parameter. If FRONTSWAP is configured,
+ ballooning is disabled by default. If FRONTSWAP is configured,
frontswap-selfshrinking is enabled by default but can be disabled
- with the 'noselfshrink' kernel boot parameter; and self-ballooning
- is enabled by default but can be disabled with the 'noselfballooning'
+ with the 'tmem.selfshrink=0' kernel boot parameter; and self-ballooning
+ is enabled by default but can be disabled with the 'tmem.selfballooning=0'
kernel boot parameter. Note that systems without a sufficiently
large swap device should not enable self-ballooning.
@@ -141,13 +139,12 @@ config XEN_GRANT_DEV_ALLOC
config SWIOTLB_XEN
def_bool y
- depends on PCI
select SWIOTLB
config XEN_TMEM
- bool
- depends on !ARM
- default y if (CLEANCACHE || FRONTSWAP)
+ tristate
+ depends on !ARM && !ARM64
+ default m if (CLEANCACHE || FRONTSWAP)
help
Shim to interface in-kernel Transcendent Memory hooks
(e.g. cleancache and frontswap) to Xen tmem hypercalls.
@@ -180,6 +177,40 @@ config XEN_PRIVCMD
depends on XEN
default m
+config XEN_STUB
+ bool "Xen stub drivers"
+ depends on XEN && X86_64 && BROKEN
+ default n
+ help
+ Allow kernel to install stub drivers, to reserve space for Xen drivers,
+ i.e. memory hotplug and cpu hotplug, and to block native drivers loaded,
+ so that real Xen drivers can be modular.
+
+ To enable Xen features like cpu and memory hotplug, select Y here.
+
+config XEN_ACPI_HOTPLUG_MEMORY
+ tristate "Xen ACPI memory hotplug"
+ depends on XEN_DOM0 && XEN_STUB && ACPI
+ default n
+ help
+ This is Xen ACPI memory hotplug.
+
+ Currently Xen only support ACPI memory hot-add. If you want
+ to hot-add memory at runtime (the hot-added memory cannot be
+ removed until machine stop), select Y/M here, otherwise select N.
+
+config XEN_ACPI_HOTPLUG_CPU
+ tristate "Xen ACPI cpu hotplug"
+ depends on XEN_DOM0 && XEN_STUB && ACPI
+ select ACPI_CONTAINER
+ default n
+ help
+ Xen ACPI cpu enumerating and hotplugging
+
+ For hotplugging, currently Xen only support ACPI cpu hotadd.
+ If you want to hotadd cpu at runtime (the hotadded cpu cannot
+ be removed until machine stop), select Y/M here.
+
config XEN_ACPI_PROCESSOR
tristate "Xen ACPI processor"
depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ
@@ -190,7 +221,7 @@ config XEN_ACPI_PROCESSOR
To do that the driver parses the Power Management data and uploads
said information to the Xen hypervisor. Then the Xen hypervisor can
- select the proper Cx and Pxx states. It also registers itslef as the
+ select the proper Cx and Pxx states. It also registers itself as the
SMM so that other drivers (such as ACPI cpufreq scaling driver) will
not load.
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index fb213cf81a7..45e00afa7f2 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,9 +1,9 @@
-ifneq ($(CONFIG_ARM),y)
-obj-y += manage.o
+ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),)
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
endif
obj-$(CONFIG_X86) += fallback.o
-obj-y += grant-table.o features.o events.o balloon.o
+obj-y += grant-table.o features.o balloon.o manage.o
+obj-y += events/
obj-y += xenbus/
nostackp := $(call cc-option, -fno-stack-protector)
@@ -16,7 +16,6 @@ xen-pad-$(CONFIG_X86) += xen-acpi-pad.o
dom0-$(CONFIG_X86) += pcpu.o
obj-$(CONFIG_XEN_DOM0) += $(dom0-y)
obj-$(CONFIG_BLOCK) += biomerge.o
-obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o
obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o
obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
@@ -30,6 +29,9 @@ obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
+obj-$(CONFIG_XEN_STUB) += xen-stub.o
+obj-$(CONFIG_XEN_ACPI_HOTPLUG_MEMORY) += xen-acpi-memhotplug.o
+obj-$(CONFIG_XEN_ACPI_HOTPLUG_CPU) += xen-acpi-cpuhotplug.o
obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c
index 119d42a2bf5..90307c0b630 100644
--- a/drivers/xen/acpi.c
+++ b/drivers/xen/acpi.c
@@ -35,28 +35,43 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
-int xen_acpi_notify_hypervisor_state(u8 sleep_state,
- u32 pm1a_cnt, u32 pm1b_cnt)
+static int xen_acpi_notify_hypervisor_state(u8 sleep_state,
+ u32 val_a, u32 val_b,
+ bool extended)
{
+ unsigned int bits = extended ? 8 : 16;
+
struct xen_platform_op op = {
.cmd = XENPF_enter_acpi_sleep,
.interface_version = XENPF_INTERFACE_VERSION,
- .u = {
- .enter_acpi_sleep = {
- .pm1a_cnt_val = (u16)pm1a_cnt,
- .pm1b_cnt_val = (u16)pm1b_cnt,
- .sleep_state = sleep_state,
- },
+ .u.enter_acpi_sleep = {
+ .val_a = (u16)val_a,
+ .val_b = (u16)val_b,
+ .sleep_state = sleep_state,
+ .flags = extended ? XENPF_ACPI_SLEEP_EXTENDED : 0,
},
};
- if ((pm1a_cnt & 0xffff0000) || (pm1b_cnt & 0xffff0000)) {
- WARN(1, "Using more than 16bits of PM1A/B 0x%x/0x%x!"
- "Email xen-devel@lists.xensource.com Thank you.\n", \
- pm1a_cnt, pm1b_cnt);
+ if (WARN((val_a & (~0 << bits)) || (val_b & (~0 << bits)),
+ "Using more than %u bits of sleep control values %#x/%#x!"
+ "Email xen-devel@lists.xen.org - Thank you.\n", \
+ bits, val_a, val_b))
return -1;
- }
HYPERVISOR_dom0_op(&op);
return 1;
}
+
+int xen_acpi_notify_hypervisor_sleep(u8 sleep_state,
+ u32 pm1a_cnt, u32 pm1b_cnt)
+{
+ return xen_acpi_notify_hypervisor_state(sleep_state, pm1a_cnt,
+ pm1b_cnt, false);
+}
+
+int xen_acpi_notify_hypervisor_extended_sleep(u8 sleep_state,
+ u32 val_a, u32 val_b)
+{
+ return xen_acpi_notify_hypervisor_state(sleep_state, val_a,
+ val_b, true);
+}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index a56776dbe09..5c660c77f03 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -36,6 +36,9 @@
* IN THE SOFTWARE.
*/
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/errno.h>
@@ -50,6 +53,7 @@
#include <linux/notifier.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
+#include <linux/percpu-defs.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -88,14 +92,8 @@ EXPORT_SYMBOL_GPL(balloon_stats);
/* We increase/decrease in batches which fit in a page */
static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
+static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
-#ifdef CONFIG_HIGHMEM
-#define inc_totalhigh_pages() (totalhigh_pages++)
-#define dec_totalhigh_pages() (totalhigh_pages--)
-#else
-#define inc_totalhigh_pages() do {} while (0)
-#define dec_totalhigh_pages() do {} while (0)
-#endif
/* List of ballooned pages, threaded through the mem_map array. */
static LIST_HEAD(ballooned_pages);
@@ -132,9 +130,7 @@ static void __balloon_append(struct page *page)
static void balloon_append(struct page *page)
{
__balloon_append(page);
- if (PageHighMem(page))
- dec_totalhigh_pages();
- totalram_pages--;
+ adjust_managed_page_count(page, -1);
}
/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
@@ -151,24 +147,16 @@ static struct page *balloon_retrieve(bool prefer_highmem)
page = list_entry(ballooned_pages.next, struct page, lru);
list_del(&page->lru);
- if (PageHighMem(page)) {
+ if (PageHighMem(page))
balloon_stats.balloon_high--;
- inc_totalhigh_pages();
- } else
+ else
balloon_stats.balloon_low--;
- totalram_pages++;
+ adjust_managed_page_count(page, 1);
return page;
}
-static struct page *balloon_first_page(void)
-{
- if (list_empty(&ballooned_pages))
- return NULL;
- return list_entry(ballooned_pages.next, struct page, lru);
-}
-
static struct page *balloon_next_page(struct page *page)
{
struct list_head *next = page->lru.next;
@@ -242,7 +230,7 @@ static enum bp_state reserve_additional_memory(long credit)
rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT);
if (rc) {
- pr_info("xen_balloon: %s: add_memory() failed: %i\n", __func__, rc);
+ pr_info("%s: add_memory() failed: %i\n", __func__, rc);
return BP_EAGAIN;
}
@@ -333,7 +321,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
- page = balloon_first_page();
+ page = list_first_entry_or_null(&ballooned_pages, struct page, lru);
for (i = 0; i < nr_pages; i++) {
if (!page) {
nr_pages = i;
@@ -354,27 +342,25 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
BUG_ON(page == NULL);
pfn = page_to_pfn(page);
- BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
- phys_to_machine_mapping_valid(pfn));
-
- set_phys_to_machine(pfn, frame_list[i]);
#ifdef CONFIG_XEN_HAVE_PVMMU
- /* Link back into the page tables if not highmem. */
- if (xen_pv_domain() && !PageHighMem(page)) {
- int ret;
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- mfn_pte(frame_list[i], PAGE_KERNEL),
- 0);
- BUG_ON(ret);
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ set_phys_to_machine(pfn, frame_list[i]);
+
+ /* Link back into the page tables if not highmem. */
+ if (!PageHighMem(page)) {
+ int ret;
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ mfn_pte(frame_list[i], PAGE_KERNEL),
+ 0);
+ BUG_ON(ret);
+ }
}
#endif
/* Relinquish the page back to the allocator. */
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
+ __free_reserved_page(page);
}
balloon_stats.current_pages += rc;
@@ -407,38 +393,59 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
nr_pages = ARRAY_SIZE(frame_list);
for (i = 0; i < nr_pages; i++) {
- if ((page = alloc_page(gfp)) == NULL) {
+ page = alloc_page(gfp);
+ if (page == NULL) {
nr_pages = i;
state = BP_EAGAIN;
break;
}
+ scrub_page(page);
- pfn = page_to_pfn(page);
- frame_list[i] = pfn_to_mfn(pfn);
+ frame_list[i] = page_to_pfn(page);
+ }
- scrub_page(page);
+ /*
+ * Ensure that ballooned highmem pages don't have kmaps.
+ *
+ * Do this before changing the p2m as kmap_flush_unused()
+ * reads PTEs to obtain pages (and hence needs the original
+ * p2m entry).
+ */
+ kmap_flush_unused();
+
+ /* Update direct mapping, invalidate P2M, and add to balloon. */
+ for (i = 0; i < nr_pages; i++) {
+ pfn = frame_list[i];
+ frame_list[i] = pfn_to_mfn(pfn);
+ page = pfn_to_page(pfn);
#ifdef CONFIG_XEN_HAVE_PVMMU
- if (xen_pv_domain() && !PageHighMem(page)) {
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- __pte_ma(0), 0);
- BUG_ON(ret);
+ /*
+ * Ballooned out frames are effectively replaced with
+ * a scratch frame. Ensure direct mappings and the
+ * p2m are consistent.
+ */
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ if (!PageHighMem(page)) {
+ struct page *scratch_page = get_balloon_scratch_page();
+
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ pfn_pte(page_to_pfn(scratch_page),
+ PAGE_KERNEL_RO), 0);
+ BUG_ON(ret);
+
+ put_balloon_scratch_page();
+ }
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
#endif
+
+ balloon_append(page);
}
- /* Ensure that ballooned highmem pages don't have kmaps. */
- kmap_flush_unused();
flush_tlb_all();
- /* No more mappings: invalidate P2M and add to balloon. */
- for (i = 0; i < nr_pages; i++) {
- pfn = mfn_to_pfn(frame_list[i]);
- __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
- balloon_append(pfn_to_page(pfn));
- }
-
set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
@@ -490,6 +497,18 @@ static void balloon_process(struct work_struct *work)
mutex_unlock(&balloon_mutex);
}
+struct page *get_balloon_scratch_page(void)
+{
+ struct page *ret = get_cpu_var(balloon_scratch_page);
+ BUG_ON(ret == NULL);
+ return ret;
+}
+
+void put_balloon_scratch_page(void)
+{
+ put_cpu_var(balloon_scratch_page);
+}
+
/* Resets the Xen limit, sets new target, and kicks off processing. */
void balloon_set_new_target(unsigned long target)
{
@@ -583,18 +602,66 @@ static void __init balloon_add_region(unsigned long start_pfn,
}
}
+static int alloc_balloon_scratch_page(int cpu)
+{
+ if (per_cpu(balloon_scratch_page, cpu) != NULL)
+ return 0;
+
+ per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+ if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+ pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+
+static int balloon_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (alloc_balloon_scratch_page(cpu))
+ return NOTIFY_BAD;
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block balloon_cpu_notifier = {
+ .notifier_call = balloon_cpu_notify,
+};
+
static int __init balloon_init(void)
{
- int i;
+ int i, cpu;
if (!xen_domain())
return -ENODEV;
- pr_info("xen/balloon: Initialising balloon driver.\n");
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ register_cpu_notifier(&balloon_cpu_notifier);
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ if (alloc_balloon_scratch_page(cpu)) {
+ put_online_cpus();
+ unregister_cpu_notifier(&balloon_cpu_notifier);
+ return -ENOMEM;
+ }
+ }
+ put_online_cpus();
+ }
+
+ pr_info("Initialising balloon driver\n");
balloon_stats.current_pages = xen_pv_domain()
? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
- : max_pfn;
+ : get_num_physpages();
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
@@ -626,4 +693,15 @@ static int __init balloon_init(void)
subsys_initcall(balloon_init);
+static int __init balloon_clear(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(balloon_scratch_page, cpu) = NULL;
+
+ return 0;
+}
+early_initcall(balloon_clear);
+
MODULE_LICENSE("GPL");
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 084041d42c9..cc6513a176b 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
#include <linux/notifier.h>
#include <xen/xen.h>
@@ -31,7 +33,7 @@ static int vcpu_online(unsigned int cpu)
err = xenbus_scanf(XBT_NIL, dir, "availability", "%15s", state);
if (err != 1) {
if (!xen_initial_domain())
- printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
+ pr_err("Unable to read cpu state\n");
return err;
}
@@ -40,7 +42,7 @@ static int vcpu_online(unsigned int cpu)
else if (strcmp(state, "offline") == 0)
return 0;
- printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", state, cpu);
+ pr_err("unknown state(%s) on CPU%d\n", state, cpu);
return -EINVAL;
}
static void vcpu_hotplug(unsigned int cpu)
diff --git a/drivers/xen/dbgp.c b/drivers/xen/dbgp.c
index f3ccc80a455..8145a59fd9f 100644
--- a/drivers/xen/dbgp.c
+++ b/drivers/xen/dbgp.c
@@ -19,7 +19,7 @@ static int xen_dbgp_op(struct usb_hcd *hcd, int op)
dbgp.op = op;
#ifdef CONFIG_PCI
- if (ctrlr->bus == &pci_bus_type) {
+ if (dev_is_pci(ctrlr)) {
const struct pci_dev *pdev = to_pci_dev(ctrlr);
dbgp.u.pci.seg = pci_domain_nr(pdev->bus);
diff --git a/drivers/xen/events/Makefile b/drivers/xen/events/Makefile
new file mode 100644
index 00000000000..62be55cd981
--- /dev/null
+++ b/drivers/xen/events/Makefile
@@ -0,0 +1,5 @@
+obj-y += events.o
+
+events-y += events_base.o
+events-y += events_2l.o
+events-y += events_fifo.o
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
new file mode 100644
index 00000000000..5db43fc100a
--- /dev/null
+++ b/drivers/xen/events/events_2l.c
@@ -0,0 +1,365 @@
+/*
+ * Xen event channels (2-level ABI)
+ *
+ * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+#include "events_internal.h"
+
+/*
+ * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
+ * careful to only use bitops which allow for this (e.g
+ * test_bit/find_first_bit and friends but not __ffs) and to pass
+ * BITS_PER_EVTCHN_WORD as the bitmask length.
+ */
+#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
+/*
+ * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
+ * array. Primarily to avoid long lines (hence the terse name).
+ */
+#define BM(x) (unsigned long *)(x)
+/* Find the first set bit in a evtchn mask */
+#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
+
+static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD],
+ cpu_evtchn_mask);
+
+static unsigned evtchn_2l_max_channels(void)
+{
+ return EVTCHN_2L_NR_CHANNELS;
+}
+
+static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu)
+{
+ clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu)));
+ set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
+}
+
+static void evtchn_2l_clear_pending(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_clear_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static void evtchn_2l_set_pending(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_set_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static bool evtchn_2l_is_pending(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ return sync_test_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static bool evtchn_2l_test_and_set_mask(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
+}
+
+static void evtchn_2l_mask(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_set_bit(port, BM(&s->evtchn_mask[0]));
+}
+
+static void evtchn_2l_unmask(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ unsigned int cpu = get_cpu();
+ int do_hypercall = 0, evtchn_pending = 0;
+
+ BUG_ON(!irqs_disabled());
+
+ if (unlikely((cpu != cpu_from_evtchn(port))))
+ do_hypercall = 1;
+ else {
+ /*
+ * Need to clear the mask before checking pending to
+ * avoid a race with an event becoming pending.
+ *
+ * EVTCHNOP_unmask will only trigger an upcall if the
+ * mask bit was set, so if a hypercall is needed
+ * remask the event.
+ */
+ sync_clear_bit(port, BM(&s->evtchn_mask[0]));
+ evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
+
+ if (unlikely(evtchn_pending && xen_hvm_domain())) {
+ sync_set_bit(port, BM(&s->evtchn_mask[0]));
+ do_hypercall = 1;
+ }
+ }
+
+ /* Slow path (hypercall) if this is a non-local port or if this is
+ * an hvm domain and an event is pending (hvm domains don't have
+ * their own implementation of irq_enable). */
+ if (do_hypercall) {
+ struct evtchn_unmask unmask = { .port = port };
+ (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+ } else {
+ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+ /*
+ * The following is basically the equivalent of
+ * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+ * the interrupt edge' if the channel is masked.
+ */
+ if (evtchn_pending &&
+ !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
+ BM(&vcpu_info->evtchn_pending_sel)))
+ vcpu_info->evtchn_upcall_pending = 1;
+ }
+
+ put_cpu();
+}
+
+static DEFINE_PER_CPU(unsigned int, current_word_idx);
+static DEFINE_PER_CPU(unsigned int, current_bit_idx);
+
+/*
+ * Mask out the i least significant bits of w
+ */
+#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
+
+static inline xen_ulong_t active_evtchns(unsigned int cpu,
+ struct shared_info *sh,
+ unsigned int idx)
+{
+ return sh->evtchn_pending[idx] &
+ per_cpu(cpu_evtchn_mask, cpu)[idx] &
+ ~sh->evtchn_mask[idx];
+}
+
+/*
+ * Search the CPU's pending events bitmasks. For each one found, map
+ * the event number to an irq, and feed it into do_IRQ() for handling.
+ *
+ * Xen uses a two-level bitmap to speed searching. The first level is
+ * a bitset of words which contain pending event bits. The second
+ * level is a bitset of pending events themselves.
+ */
+static void evtchn_2l_handle_events(unsigned cpu)
+{
+ int irq;
+ xen_ulong_t pending_words;
+ xen_ulong_t pending_bits;
+ int start_word_idx, start_bit_idx;
+ int word_idx, bit_idx;
+ int i;
+ struct shared_info *s = HYPERVISOR_shared_info;
+ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+ /* Timer interrupt has highest priority. */
+ irq = irq_from_virq(cpu, VIRQ_TIMER);
+ if (irq != -1) {
+ unsigned int evtchn = evtchn_from_irq(irq);
+ word_idx = evtchn / BITS_PER_LONG;
+ bit_idx = evtchn % BITS_PER_LONG;
+ if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx))
+ generic_handle_irq(irq);
+ }
+
+ /*
+ * Master flag must be cleared /before/ clearing
+ * selector flag. xchg_xen_ulong must contain an
+ * appropriate barrier.
+ */
+ pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
+
+ start_word_idx = __this_cpu_read(current_word_idx);
+ start_bit_idx = __this_cpu_read(current_bit_idx);
+
+ word_idx = start_word_idx;
+
+ for (i = 0; pending_words != 0; i++) {
+ xen_ulong_t words;
+
+ words = MASK_LSBS(pending_words, word_idx);
+
+ /*
+ * If we masked out all events, wrap to beginning.
+ */
+ if (words == 0) {
+ word_idx = 0;
+ bit_idx = 0;
+ continue;
+ }
+ word_idx = EVTCHN_FIRST_BIT(words);
+
+ pending_bits = active_evtchns(cpu, s, word_idx);
+ bit_idx = 0; /* usually scan entire word from start */
+ /*
+ * We scan the starting word in two parts.
+ *
+ * 1st time: start in the middle, scanning the
+ * upper bits.
+ *
+ * 2nd time: scan the whole word (not just the
+ * parts skipped in the first pass) -- if an
+ * event in the previously scanned bits is
+ * pending again it would just be scanned on
+ * the next loop anyway.
+ */
+ if (word_idx == start_word_idx) {
+ if (i == 0)
+ bit_idx = start_bit_idx;
+ }
+
+ do {
+ xen_ulong_t bits;
+ int port;
+
+ bits = MASK_LSBS(pending_bits, bit_idx);
+
+ /* If we masked out all events, move on. */
+ if (bits == 0)
+ break;
+
+ bit_idx = EVTCHN_FIRST_BIT(bits);
+
+ /* Process port. */
+ port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
+ irq = get_evtchn_to_irq(port);
+
+ if (irq != -1)
+ generic_handle_irq(irq);
+
+ bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
+
+ /* Next caller starts at last processed + 1 */
+ __this_cpu_write(current_word_idx,
+ bit_idx ? word_idx :
+ (word_idx+1) % BITS_PER_EVTCHN_WORD);
+ __this_cpu_write(current_bit_idx, bit_idx);
+ } while (bit_idx != 0);
+
+ /* Scan start_l1i twice; all others once. */
+ if ((word_idx != start_word_idx) || (i != 0))
+ pending_words &= ~(1UL << word_idx);
+
+ word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
+ }
+}
+
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+{
+ struct shared_info *sh = HYPERVISOR_shared_info;
+ int cpu = smp_processor_id();
+ xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+ int i;
+ unsigned long flags;
+ static DEFINE_SPINLOCK(debug_lock);
+ struct vcpu_info *v;
+
+ spin_lock_irqsave(&debug_lock, flags);
+
+ printk("\nvcpu %d\n ", cpu);
+
+ for_each_online_cpu(i) {
+ int pending;
+ v = per_cpu(xen_vcpu, i);
+ pending = (get_irq_regs() && i == cpu)
+ ? xen_irqs_disabled(get_irq_regs())
+ : v->evtchn_upcall_mask;
+ printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n ", i,
+ pending, v->evtchn_upcall_pending,
+ (int)(sizeof(v->evtchn_pending_sel)*2),
+ v->evtchn_pending_sel);
+ }
+ v = per_cpu(xen_vcpu, cpu);
+
+ printk("\npending:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)sizeof(sh->evtchn_pending[0])*2,
+ sh->evtchn_pending[i],
+ i % 8 == 0 ? "\n " : " ");
+ printk("\nglobal mask:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)(sizeof(sh->evtchn_mask[0])*2),
+ sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nglobally unmasked:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)(sizeof(sh->evtchn_mask[0])*2),
+ sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nlocal cpu%d mask:\n ", cpu);
+ for (i = (EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
+ cpu_evtchn[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nlocally unmasked:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
+ xen_ulong_t pending = sh->evtchn_pending[i]
+ & ~sh->evtchn_mask[i]
+ & cpu_evtchn[i];
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)(sizeof(sh->evtchn_mask[0])*2),
+ pending, i % 8 == 0 ? "\n " : " ");
+ }
+
+ printk("\npending list:\n");
+ for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
+ if (sync_test_bit(i, BM(sh->evtchn_pending))) {
+ int word_idx = i / BITS_PER_EVTCHN_WORD;
+ printk(" %d: event %d -> irq %d%s%s%s\n",
+ cpu_from_evtchn(i), i,
+ get_evtchn_to_irq(i),
+ sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
+ ? "" : " l2-clear",
+ !sync_test_bit(i, BM(sh->evtchn_mask))
+ ? "" : " globally-masked",
+ sync_test_bit(i, BM(cpu_evtchn))
+ ? "" : " locally-masked");
+ }
+ }
+
+ spin_unlock_irqrestore(&debug_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+static const struct evtchn_ops evtchn_ops_2l = {
+ .max_channels = evtchn_2l_max_channels,
+ .nr_channels = evtchn_2l_max_channels,
+ .bind_to_cpu = evtchn_2l_bind_to_cpu,
+ .clear_pending = evtchn_2l_clear_pending,
+ .set_pending = evtchn_2l_set_pending,
+ .is_pending = evtchn_2l_is_pending,
+ .test_and_set_mask = evtchn_2l_test_and_set_mask,
+ .mask = evtchn_2l_mask,
+ .unmask = evtchn_2l_unmask,
+ .handle_events = evtchn_2l_handle_events,
+};
+
+void __init xen_evtchn_2l_init(void)
+{
+ pr_info("Using 2-level ABI\n");
+ evtchn_ops = &evtchn_ops_2l;
+}
diff --git a/drivers/xen/events.c b/drivers/xen/events/events_base.c
index 0be4df39e95..c919d3d5c84 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events/events_base.c
@@ -21,6 +21,8 @@
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
#include <linux/linkage.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
@@ -54,8 +56,13 @@
#include <xen/interface/hvm/params.h>
#include <xen/interface/physdev.h>
#include <xen/interface/sched.h>
+#include <xen/interface/vcpu.h>
#include <asm/hw_irq.h>
+#include "events_internal.h"
+
+const struct evtchn_ops *evtchn_ops;
+
/*
* This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables.
@@ -70,58 +77,15 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
-/* Interrupt types. */
-enum xen_irq_type {
- IRQT_UNBOUND = 0,
- IRQT_PIRQ,
- IRQT_VIRQ,
- IRQT_IPI,
- IRQT_EVTCHN
-};
-
-/*
- * Packed IRQ information:
- * type - enum xen_irq_type
- * event channel - irq->event channel mapping
- * cpu - cpu this event channel is bound to
- * index - type-specific information:
- * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
- * guest, or GSI (real passthrough IRQ) of the device.
- * VIRQ - virq number
- * IPI - IPI vector
- * EVTCHN -
- */
-struct irq_info {
- struct list_head list;
- int refcnt;
- enum xen_irq_type type; /* type */
- unsigned irq;
- unsigned short evtchn; /* event channel */
- unsigned short cpu; /* cpu bound */
-
- union {
- unsigned short virq;
- enum ipi_vector ipi;
- struct {
- unsigned short pirq;
- unsigned short gsi;
- unsigned char vector;
- unsigned char flags;
- uint16_t domid;
- } pirq;
- } u;
-};
-#define PIRQ_NEEDS_EOI (1 << 0)
-#define PIRQ_SHAREABLE (1 << 1)
-
-static int *evtchn_to_irq;
+int **evtchn_to_irq;
#ifdef CONFIG_X86
static unsigned long *pirq_eoi_map;
#endif
static bool (*pirq_needs_eoi)(unsigned irq);
-static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG],
- cpu_evtchn_mask);
+#define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
+#define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
+#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
/* Xen will never allocate port zero for any purpose. */
#define VALID_EVTCHN(chn) ((chn) != 0)
@@ -132,19 +96,75 @@ static struct irq_chip xen_pirq_chip;
static void enable_dynirq(struct irq_data *data);
static void disable_dynirq(struct irq_data *data);
+static void clear_evtchn_to_irq_row(unsigned row)
+{
+ unsigned col;
+
+ for (col = 0; col < EVTCHN_PER_ROW; col++)
+ evtchn_to_irq[row][col] = -1;
+}
+
+static void clear_evtchn_to_irq_all(void)
+{
+ unsigned row;
+
+ for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
+ if (evtchn_to_irq[row] == NULL)
+ continue;
+ clear_evtchn_to_irq_row(row);
+ }
+}
+
+static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
+{
+ unsigned row;
+ unsigned col;
+
+ if (evtchn >= xen_evtchn_max_channels())
+ return -EINVAL;
+
+ row = EVTCHN_ROW(evtchn);
+ col = EVTCHN_COL(evtchn);
+
+ if (evtchn_to_irq[row] == NULL) {
+ /* Unallocated irq entries return -1 anyway */
+ if (irq == -1)
+ return 0;
+
+ evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
+ if (evtchn_to_irq[row] == NULL)
+ return -ENOMEM;
+
+ clear_evtchn_to_irq_row(row);
+ }
+
+ evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq;
+ return 0;
+}
+
+int get_evtchn_to_irq(unsigned evtchn)
+{
+ if (evtchn >= xen_evtchn_max_channels())
+ return -1;
+ if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
+ return -1;
+ return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
+}
+
/* Get info for IRQ */
-static struct irq_info *info_for_irq(unsigned irq)
+struct irq_info *info_for_irq(unsigned irq)
{
return irq_get_handler_data(irq);
}
/* Constructors for packed IRQ information. */
-static void xen_irq_info_common_init(struct irq_info *info,
+static int xen_irq_info_common_setup(struct irq_info *info,
unsigned irq,
enum xen_irq_type type,
- unsigned short evtchn,
+ unsigned evtchn,
unsigned short cpu)
{
+ int ret;
BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
@@ -153,68 +173,78 @@ static void xen_irq_info_common_init(struct irq_info *info,
info->evtchn = evtchn;
info->cpu = cpu;
- evtchn_to_irq[evtchn] = irq;
+ ret = set_evtchn_to_irq(evtchn, irq);
+ if (ret < 0)
+ return ret;
+
+ irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
+
+ return xen_evtchn_port_setup(info);
}
-static void xen_irq_info_evtchn_init(unsigned irq,
- unsigned short evtchn)
+static int xen_irq_info_evtchn_setup(unsigned irq,
+ unsigned evtchn)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0);
+ return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
}
-static void xen_irq_info_ipi_init(unsigned cpu,
+static int xen_irq_info_ipi_setup(unsigned cpu,
unsigned irq,
- unsigned short evtchn,
+ unsigned evtchn,
enum ipi_vector ipi)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
-
info->u.ipi = ipi;
per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+
+ return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
}
-static void xen_irq_info_virq_init(unsigned cpu,
+static int xen_irq_info_virq_setup(unsigned cpu,
unsigned irq,
- unsigned short evtchn,
- unsigned short virq)
+ unsigned evtchn,
+ unsigned virq)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
-
info->u.virq = virq;
per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+ return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
}
-static void xen_irq_info_pirq_init(unsigned irq,
- unsigned short evtchn,
- unsigned short pirq,
- unsigned short gsi,
- unsigned short vector,
+static int xen_irq_info_pirq_setup(unsigned irq,
+ unsigned evtchn,
+ unsigned pirq,
+ unsigned gsi,
uint16_t domid,
unsigned char flags)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
-
info->u.pirq.pirq = pirq;
info->u.pirq.gsi = gsi;
- info->u.pirq.vector = vector;
info->u.pirq.domid = domid;
info->u.pirq.flags = flags;
+
+ return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
+}
+
+static void xen_irq_info_cleanup(struct irq_info *info)
+{
+ set_evtchn_to_irq(info->evtchn, -1);
+ info->evtchn = 0;
}
/*
* Accessors for packed IRQ information.
*/
-static unsigned int evtchn_from_irq(unsigned irq)
+unsigned int evtchn_from_irq(unsigned irq)
{
if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
return 0;
@@ -224,10 +254,15 @@ static unsigned int evtchn_from_irq(unsigned irq)
unsigned irq_from_evtchn(unsigned int evtchn)
{
- return evtchn_to_irq[evtchn];
+ return get_evtchn_to_irq(evtchn);
}
EXPORT_SYMBOL_GPL(irq_from_evtchn);
+int irq_from_virq(unsigned int cpu, unsigned int virq)
+{
+ return per_cpu(virq_to_irq, cpu)[virq];
+}
+
static enum ipi_vector ipi_from_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
@@ -263,14 +298,14 @@ static enum xen_irq_type type_from_irq(unsigned irq)
return info_for_irq(irq)->type;
}
-static unsigned cpu_from_irq(unsigned irq)
+unsigned cpu_from_irq(unsigned irq)
{
return info_for_irq(irq)->cpu;
}
-static unsigned int cpu_from_evtchn(unsigned int evtchn)
+unsigned int cpu_from_evtchn(unsigned int evtchn)
{
- int irq = evtchn_to_irq[evtchn];
+ int irq = get_evtchn_to_irq(evtchn);
unsigned ret = 0;
if (irq != -1)
@@ -294,67 +329,28 @@ static bool pirq_needs_eoi_flag(unsigned irq)
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}
-static inline unsigned long active_evtchns(unsigned int cpu,
- struct shared_info *sh,
- unsigned int idx)
-{
- return sh->evtchn_pending[idx] &
- per_cpu(cpu_evtchn_mask, cpu)[idx] &
- ~sh->evtchn_mask[idx];
-}
-
static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
{
- int irq = evtchn_to_irq[chn];
+ int irq = get_evtchn_to_irq(chn);
+ struct irq_info *info = info_for_irq(irq);
BUG_ON(irq == -1);
#ifdef CONFIG_SMP
- cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
+ cpumask_copy(irq_get_irq_data(irq)->affinity, cpumask_of(cpu));
#endif
+ xen_evtchn_port_bind_to_cpu(info, cpu);
- clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_from_irq(irq)));
- set_bit(chn, per_cpu(cpu_evtchn_mask, cpu));
-
- info_for_irq(irq)->cpu = cpu;
-}
-
-static void init_evtchn_cpu_bindings(void)
-{
- int i;
-#ifdef CONFIG_SMP
- struct irq_info *info;
-
- /* By default all event channels notify CPU#0. */
- list_for_each_entry(info, &xen_irq_list_head, list) {
- struct irq_desc *desc = irq_to_desc(info->irq);
- cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
- }
-#endif
-
- for_each_possible_cpu(i)
- memset(per_cpu(cpu_evtchn_mask, i),
- (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i)));
+ info->cpu = cpu;
}
-static inline void clear_evtchn(int port)
+static void xen_evtchn_mask_all(void)
{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_clear_bit(port, &s->evtchn_pending[0]);
-}
+ unsigned int evtchn;
-static inline void set_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_set_bit(port, &s->evtchn_pending[0]);
-}
-
-static inline int test_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- return sync_test_bit(port, &s->evtchn_pending[0]);
+ for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
+ mask_evtchn(evtchn);
}
-
/**
* notify_remote_via_irq - send event to remote end of event channel via irq
* @irq: irq of event channel to send event to
@@ -372,61 +368,12 @@ void notify_remote_via_irq(int irq)
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);
-static void mask_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_set_bit(port, &s->evtchn_mask[0]);
-}
-
-static void unmask_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- unsigned int cpu = get_cpu();
- int do_hypercall = 0, evtchn_pending = 0;
-
- BUG_ON(!irqs_disabled());
-
- if (unlikely((cpu != cpu_from_evtchn(port))))
- do_hypercall = 1;
- else
- evtchn_pending = sync_test_bit(port, &s->evtchn_pending[0]);
-
- if (unlikely(evtchn_pending && xen_hvm_domain()))
- do_hypercall = 1;
-
- /* Slow path (hypercall) if this is a non-local port or if this is
- * an hvm domain and an event is pending (hvm domains don't have
- * their own implementation of irq_enable). */
- if (do_hypercall) {
- struct evtchn_unmask unmask = { .port = port };
- (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
- } else {
- struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
-
- sync_clear_bit(port, &s->evtchn_mask[0]);
-
- /*
- * The following is basically the equivalent of
- * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
- * the interrupt edge' if the channel is masked.
- */
- if (evtchn_pending &&
- !sync_test_and_set_bit(port / BITS_PER_LONG,
- &vcpu_info->evtchn_pending_sel))
- vcpu_info->evtchn_upcall_pending = 1;
- }
-
- put_cpu();
-}
-
static void xen_irq_init(unsigned irq)
{
struct irq_info *info;
#ifdef CONFIG_SMP
- struct irq_desc *desc = irq_to_desc(irq);
-
/* By default all event channels notify CPU#0. */
- cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
+ cpumask_copy(irq_get_irq_data(irq)->affinity, cpumask_of(0));
#endif
info = kzalloc(sizeof(*info), GFP_KERNEL);
@@ -441,29 +388,22 @@ static void xen_irq_init(unsigned irq)
list_add_tail(&info->list, &xen_irq_list_head);
}
-static int __must_check xen_allocate_irq_dynamic(void)
+static int __must_check xen_allocate_irqs_dynamic(int nvec)
{
- int first = 0;
- int irq;
+ int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
-#ifdef CONFIG_X86_IO_APIC
- /*
- * For an HVM guest or domain 0 which see "real" (emulated or
- * actual respectively) GSIs we allocate dynamic IRQs
- * e.g. those corresponding to event channels or MSIs
- * etc. from the range above those "real" GSIs to avoid
- * collisions.
- */
- if (xen_initial_domain() || xen_hvm_domain())
- first = get_nr_irqs_gsi();
-#endif
+ if (irq >= 0) {
+ for (i = 0; i < nvec; i++)
+ xen_irq_init(irq + i);
+ }
- irq = irq_alloc_desc_from(first, -1);
+ return irq;
+}
- if (irq >= 0)
- xen_irq_init(irq);
+static inline int __must_check xen_allocate_irq_dynamic(void)
+{
- return irq;
+ return xen_allocate_irqs_dynamic(1);
}
static int __must_check xen_allocate_irq_gsi(unsigned gsi)
@@ -494,6 +434,9 @@ static void xen_free_irq(unsigned irq)
{
struct irq_info *info = irq_get_handler_data(irq);
+ if (WARN_ON(!info))
+ return;
+
list_del(&info->list);
irq_set_handler_data(irq, NULL);
@@ -509,6 +452,15 @@ static void xen_free_irq(unsigned irq)
irq_free_desc(irq);
}
+static void xen_evtchn_close(unsigned int port)
+{
+ struct evtchn_close close;
+
+ close.port = port;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+}
+
static void pirq_query_unmask(int irq)
{
struct physdev_irq_status_query irq_status;
@@ -525,13 +477,6 @@ static void pirq_query_unmask(int irq)
info->u.pirq.flags |= PIRQ_NEEDS_EOI;
}
-static bool probing_irq(int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
-
- return desc && desc->action == NULL;
-}
-
static void eoi_pirq(struct irq_data *data)
{
int evtchn = evtchn_from_irq(data->irq);
@@ -573,16 +518,20 @@ static unsigned int __startup_pirq(unsigned int irq)
BIND_PIRQ__WILL_SHARE : 0;
rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
if (rc != 0) {
- if (!probing_irq(irq))
- printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
- irq);
+ pr_warn("Failed to obtain physical IRQ %d\n", irq);
return 0;
}
evtchn = bind_pirq.port;
pirq_query_unmask(irq);
- evtchn_to_irq[evtchn] = irq;
+ rc = set_evtchn_to_irq(evtchn, irq);
+ if (rc != 0) {
+ pr_err("irq%d: Failed to set port to irq mapping (%d)\n",
+ irq, rc);
+ xen_evtchn_close(evtchn);
+ return 0;
+ }
bind_evtchn_to_cpu(evtchn, 0);
info->evtchn = evtchn;
@@ -600,10 +549,9 @@ static unsigned int startup_pirq(struct irq_data *data)
static void shutdown_pirq(struct irq_data *data)
{
- struct evtchn_close close;
unsigned int irq = data->irq;
struct irq_info *info = info_for_irq(irq);
- int evtchn = evtchn_from_irq(irq);
+ unsigned evtchn = evtchn_from_irq(irq);
BUG_ON(info->type != IRQT_PIRQ);
@@ -611,14 +559,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
mask_evtchn(evtchn);
-
- close.port = evtchn;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
-
- bind_evtchn_to_cpu(evtchn, 0);
- evtchn_to_irq[evtchn] = -1;
- info->evtchn = 0;
+ xen_evtchn_close(evtchn);
+ xen_irq_info_cleanup(info);
}
static void enable_pirq(struct irq_data *data)
@@ -647,6 +589,41 @@ int xen_irq_from_gsi(unsigned gsi)
}
EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
+static void __unbind_from_irq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+ struct irq_info *info = irq_get_handler_data(irq);
+
+ if (info->refcnt > 0) {
+ info->refcnt--;
+ if (info->refcnt != 0)
+ return;
+ }
+
+ if (VALID_EVTCHN(evtchn)) {
+ unsigned int cpu = cpu_from_irq(irq);
+
+ xen_evtchn_close(evtchn);
+
+ switch (type_from_irq(irq)) {
+ case IRQT_VIRQ:
+ per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
+ break;
+ case IRQT_IPI:
+ per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
+ break;
+ default:
+ break;
+ }
+
+ xen_irq_info_cleanup(info);
+ }
+
+ BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
+
+ xen_free_irq(irq);
+}
+
/*
* Do not make any assumptions regarding the relationship between the
* IRQ number returned here and the Xen pirq argument.
@@ -662,13 +639,14 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
{
int irq = -1;
struct physdev_irq irq_op;
+ int ret;
mutex_lock(&irq_mapping_update_lock);
irq = xen_irq_from_gsi(gsi);
if (irq != -1) {
- printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
- irq, gsi);
+ pr_info("%s: returning irq %d for gsi %u\n",
+ __func__, irq, gsi);
goto out;
}
@@ -689,8 +667,13 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
goto out;
}
- xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF,
+ ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
shareable ? PIRQ_SHAREABLE : 0);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
pirq_query_unmask(irq);
/* We try to use the handler with the appropriate semantic for the
@@ -737,21 +720,25 @@ int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
}
int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
- int pirq, int vector, const char *name,
- domid_t domid)
+ int pirq, int nvec, const char *name, domid_t domid)
{
- int irq, ret;
+ int i, irq, ret;
mutex_lock(&irq_mapping_update_lock);
- irq = xen_allocate_irq_dynamic();
+ irq = xen_allocate_irqs_dynamic(nvec);
if (irq < 0)
goto out;
- irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
- name);
+ for (i = 0; i < nvec; i++) {
+ irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
+
+ ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
+ i == 0 ? 0 : PIRQ_MSI_GROUP);
+ if (ret < 0)
+ goto error_irq;
+ }
- xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0);
ret = irq_set_msi_desc(irq, msidesc);
if (ret < 0)
goto error_irq;
@@ -759,26 +746,27 @@ out:
mutex_unlock(&irq_mapping_update_lock);
return irq;
error_irq:
+ for (; i >= 0; i--)
+ __unbind_from_irq(irq + i);
mutex_unlock(&irq_mapping_update_lock);
- xen_free_irq(irq);
return ret;
}
#endif
int xen_destroy_irq(int irq)
{
- struct irq_desc *desc;
struct physdev_unmap_pirq unmap_irq;
struct irq_info *info = info_for_irq(irq);
int rc = -ENOENT;
mutex_lock(&irq_mapping_update_lock);
- desc = irq_to_desc(irq);
- if (!desc)
- goto out;
-
- if (xen_initial_domain()) {
+ /*
+ * If trying to remove a vector in a MSI group different
+ * than the first one skip the PIRQ unmap unless this vector
+ * is the first one in the group.
+ */
+ if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
unmap_irq.pirq = info->u.pirq.pirq;
unmap_irq.domid = info->u.pirq.domid;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
@@ -787,10 +775,10 @@ int xen_destroy_irq(int irq)
* (free_domain_pirqs).
*/
if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
- printk(KERN_INFO "domain %d does not have %d anymore\n",
+ pr_info("domain %d does not have %d anymore\n",
info->u.pirq.domid, info->u.pirq.pirq);
else if (rc) {
- printk(KERN_WARNING "unmap irq failed %d\n", rc);
+ pr_warn("unmap irq failed %d\n", rc);
goto out;
}
}
@@ -830,28 +818,39 @@ int xen_pirq_from_irq(unsigned irq)
return pirq_from_irq(irq);
}
EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
+
int bind_evtchn_to_irq(unsigned int evtchn)
{
int irq;
+ int ret;
+
+ if (evtchn >= xen_evtchn_max_channels())
+ return -ENOMEM;
mutex_lock(&irq_mapping_update_lock);
- irq = evtchn_to_irq[evtchn];
+ irq = get_evtchn_to_irq(evtchn);
if (irq == -1) {
irq = xen_allocate_irq_dynamic();
- if (irq == -1)
+ if (irq < 0)
goto out;
irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
handle_edge_irq, "event");
- xen_irq_info_evtchn_init(irq, evtchn);
+ ret = xen_irq_info_evtchn_setup(irq, evtchn);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
+ /* New interdomain events are bound to VCPU 0. */
+ bind_evtchn_to_cpu(evtchn, 0);
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
}
- irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
out:
mutex_unlock(&irq_mapping_update_lock);
@@ -864,6 +863,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
int evtchn, irq;
+ int ret;
mutex_lock(&irq_mapping_update_lock);
@@ -883,8 +883,12 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
BUG();
evtchn = bind_ipi.port;
- xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
-
+ ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
bind_evtchn_to_cpu(evtchn, cpu);
} else {
struct irq_info *info = info_for_irq(irq);
@@ -917,7 +921,7 @@ static int find_virq(unsigned int virq, unsigned int cpu)
int port, rc = -ENOENT;
memset(&status, 0, sizeof(status));
- for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
+ for (port = 0; port < xen_evtchn_max_channels(); port++) {
status.dom = DOMID_SELF;
status.port = port;
rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
@@ -933,6 +937,19 @@ static int find_virq(unsigned int virq, unsigned int cpu)
return rc;
}
+/**
+ * xen_evtchn_nr_channels - number of usable event channel ports
+ *
+ * This may be less than the maximum supported by the current
+ * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
+ * supported.
+ */
+unsigned xen_evtchn_nr_channels(void)
+{
+ return evtchn_ops->nr_channels();
+}
+EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
+
int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
@@ -944,7 +961,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
if (irq == -1) {
irq = xen_allocate_irq_dynamic();
- if (irq == -1)
+ if (irq < 0)
goto out;
irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
@@ -963,7 +980,12 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
evtchn = ret;
}
- xen_irq_info_virq_init(cpu, irq, evtchn, virq);
+ ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
bind_evtchn_to_cpu(evtchn, cpu);
} else {
@@ -979,47 +1001,8 @@ out:
static void unbind_from_irq(unsigned int irq)
{
- struct evtchn_close close;
- int evtchn = evtchn_from_irq(irq);
- struct irq_info *info = irq_get_handler_data(irq);
-
mutex_lock(&irq_mapping_update_lock);
-
- if (info->refcnt > 0) {
- info->refcnt--;
- if (info->refcnt != 0)
- goto done;
- }
-
- if (VALID_EVTCHN(evtchn)) {
- close.port = evtchn;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
-
- switch (type_from_irq(irq)) {
- case IRQT_VIRQ:
- per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
- [virq_from_irq(irq)] = -1;
- break;
- case IRQT_IPI:
- per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
- [ipi_from_irq(irq)] = -1;
- break;
- default:
- break;
- }
-
- /* Closed ports are implicitly re-bound to VCPU0. */
- bind_evtchn_to_cpu(evtchn, 0);
-
- evtchn_to_irq[evtchn] = -1;
- }
-
- BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
-
- xen_free_irq(irq);
-
- done:
+ __unbind_from_irq(irq);
mutex_unlock(&irq_mapping_update_lock);
}
@@ -1110,14 +1093,35 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
void unbind_from_irqhandler(unsigned int irq, void *dev_id)
{
+ struct irq_info *info = irq_get_handler_data(irq);
+
+ if (WARN_ON(!info))
+ return;
free_irq(irq, dev_id);
unbind_from_irq(irq);
}
EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
+/**
+ * xen_set_irq_priority() - set an event channel priority.
+ * @irq:irq bound to an event channel.
+ * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
+ */
+int xen_set_irq_priority(unsigned irq, unsigned priority)
+{
+ struct evtchn_set_priority set_priority;
+
+ set_priority.port = evtchn_from_irq(irq);
+ set_priority.priority = priority;
+
+ return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
+ &set_priority);
+}
+EXPORT_SYMBOL_GPL(xen_set_irq_priority);
+
int evtchn_make_refcounted(unsigned int evtchn)
{
- int irq = evtchn_to_irq[evtchn];
+ int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info;
if (irq == -1)
@@ -1142,12 +1146,12 @@ int evtchn_get(unsigned int evtchn)
struct irq_info *info;
int err = -ENOENT;
- if (evtchn >= NR_EVENT_CHANNELS)
+ if (evtchn >= xen_evtchn_max_channels())
return -EINVAL;
mutex_lock(&irq_mapping_update_lock);
- irq = evtchn_to_irq[evtchn];
+ irq = get_evtchn_to_irq(evtchn);
if (irq == -1)
goto done;
@@ -1171,7 +1175,7 @@ EXPORT_SYMBOL_GPL(evtchn_get);
void evtchn_put(unsigned int evtchn)
{
- int irq = evtchn_to_irq[evtchn];
+ int irq = get_evtchn_to_irq(evtchn);
if (WARN_ON(irq == -1))
return;
unbind_from_irq(irq);
@@ -1180,205 +1184,36 @@ EXPORT_SYMBOL_GPL(evtchn_put);
void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
{
- int irq = per_cpu(ipi_to_irq, cpu)[vector];
- BUG_ON(irq < 0);
- notify_remote_via_irq(irq);
-}
-
-irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
-{
- struct shared_info *sh = HYPERVISOR_shared_info;
- int cpu = smp_processor_id();
- unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
- int i;
- unsigned long flags;
- static DEFINE_SPINLOCK(debug_lock);
- struct vcpu_info *v;
-
- spin_lock_irqsave(&debug_lock, flags);
-
- printk("\nvcpu %d\n ", cpu);
-
- for_each_online_cpu(i) {
- int pending;
- v = per_cpu(xen_vcpu, i);
- pending = (get_irq_regs() && i == cpu)
- ? xen_irqs_disabled(get_irq_regs())
- : v->evtchn_upcall_mask;
- printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i,
- pending, v->evtchn_upcall_pending,
- (int)(sizeof(v->evtchn_pending_sel)*2),
- v->evtchn_pending_sel);
- }
- v = per_cpu(xen_vcpu, cpu);
-
- printk("\npending:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
- printk("%0*lx%s", (int)sizeof(sh->evtchn_pending[0])*2,
- sh->evtchn_pending[i],
- i % 8 == 0 ? "\n " : " ");
- printk("\nglobal mask:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
- printk("%0*lx%s",
- (int)(sizeof(sh->evtchn_mask[0])*2),
- sh->evtchn_mask[i],
- i % 8 == 0 ? "\n " : " ");
-
- printk("\nglobally unmasked:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
- printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
- sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
- i % 8 == 0 ? "\n " : " ");
-
- printk("\nlocal cpu%d mask:\n ", cpu);
- for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--)
- printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2),
- cpu_evtchn[i],
- i % 8 == 0 ? "\n " : " ");
-
- printk("\nlocally unmasked:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
- unsigned long pending = sh->evtchn_pending[i]
- & ~sh->evtchn_mask[i]
- & cpu_evtchn[i];
- printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
- pending, i % 8 == 0 ? "\n " : " ");
- }
+ int irq;
- printk("\npending list:\n");
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- if (sync_test_bit(i, sh->evtchn_pending)) {
- int word_idx = i / BITS_PER_LONG;
- printk(" %d: event %d -> irq %d%s%s%s\n",
- cpu_from_evtchn(i), i,
- evtchn_to_irq[i],
- sync_test_bit(word_idx, &v->evtchn_pending_sel)
- ? "" : " l2-clear",
- !sync_test_bit(i, sh->evtchn_mask)
- ? "" : " globally-masked",
- sync_test_bit(i, cpu_evtchn)
- ? "" : " locally-masked");
- }
+#ifdef CONFIG_X86
+ if (unlikely(vector == XEN_NMI_VECTOR)) {
+ int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
+ if (rc < 0)
+ printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
+ return;
}
-
- spin_unlock_irqrestore(&debug_lock, flags);
-
- return IRQ_HANDLED;
+#endif
+ irq = per_cpu(ipi_to_irq, cpu)[vector];
+ BUG_ON(irq < 0);
+ notify_remote_via_irq(irq);
}
static DEFINE_PER_CPU(unsigned, xed_nesting_count);
-static DEFINE_PER_CPU(unsigned int, current_word_idx);
-static DEFINE_PER_CPU(unsigned int, current_bit_idx);
-
-/*
- * Mask out the i least significant bits of w
- */
-#define MASK_LSBS(w, i) (w & ((~0UL) << i))
-/*
- * Search the CPUs pending events bitmasks. For each one found, map
- * the event number to an irq, and feed it into do_IRQ() for
- * handling.
- *
- * Xen uses a two-level bitmap to speed searching. The first level is
- * a bitset of words which contain pending event bits. The second
- * level is a bitset of pending events themselves.
- */
static void __xen_evtchn_do_upcall(void)
{
- int start_word_idx, start_bit_idx;
- int word_idx, bit_idx;
- int i;
- int cpu = get_cpu();
- struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+ int cpu = get_cpu();
unsigned count;
do {
- unsigned long pending_words;
-
vcpu_info->evtchn_upcall_pending = 0;
if (__this_cpu_inc_return(xed_nesting_count) - 1)
goto out;
-#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
- /* Clear master flag /before/ clearing selector flag. */
- wmb();
-#endif
- pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
-
- start_word_idx = __this_cpu_read(current_word_idx);
- start_bit_idx = __this_cpu_read(current_bit_idx);
-
- word_idx = start_word_idx;
-
- for (i = 0; pending_words != 0; i++) {
- unsigned long pending_bits;
- unsigned long words;
-
- words = MASK_LSBS(pending_words, word_idx);
-
- /*
- * If we masked out all events, wrap to beginning.
- */
- if (words == 0) {
- word_idx = 0;
- bit_idx = 0;
- continue;
- }
- word_idx = __ffs(words);
-
- pending_bits = active_evtchns(cpu, s, word_idx);
- bit_idx = 0; /* usually scan entire word from start */
- if (word_idx == start_word_idx) {
- /* We scan the starting word in two parts */
- if (i == 0)
- /* 1st time: start in the middle */
- bit_idx = start_bit_idx;
- else
- /* 2nd time: mask bits done already */
- bit_idx &= (1UL << start_bit_idx) - 1;
- }
-
- do {
- unsigned long bits;
- int port, irq;
- struct irq_desc *desc;
-
- bits = MASK_LSBS(pending_bits, bit_idx);
-
- /* If we masked out all events, move on. */
- if (bits == 0)
- break;
-
- bit_idx = __ffs(bits);
-
- /* Process port. */
- port = (word_idx * BITS_PER_LONG) + bit_idx;
- irq = evtchn_to_irq[port];
-
- if (irq != -1) {
- desc = irq_to_desc(irq);
- if (desc)
- generic_handle_irq_desc(irq, desc);
- }
-
- bit_idx = (bit_idx + 1) % BITS_PER_LONG;
-
- /* Next caller starts at last processed + 1 */
- __this_cpu_write(current_word_idx,
- bit_idx ? word_idx :
- (word_idx+1) % BITS_PER_LONG);
- __this_cpu_write(current_bit_idx, bit_idx);
- } while (bit_idx != 0);
-
- /* Scan start_l1i twice; all others once. */
- if ((word_idx != start_word_idx) || (i != 0))
- pending_words &= ~(1UL << word_idx);
-
- word_idx = (word_idx + 1) % BITS_PER_LONG;
- }
+ xen_evtchn_handle_events(cpu);
BUG_ON(!irqs_disabled());
@@ -1398,6 +1233,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
irq_enter();
#ifdef CONFIG_X86
exit_idle();
+ inc_irq_stat(irq_hv_callback_count);
#endif
__xen_evtchn_do_upcall();
@@ -1417,6 +1253,9 @@ void rebind_evtchn_irq(int evtchn, int irq)
{
struct irq_info *info = info_for_irq(irq);
+ if (WARN_ON(!info))
+ return;
+
/* Make sure the irq is masked, since the new event channel
will also be masked. */
disable_irq(irq);
@@ -1424,12 +1263,12 @@ void rebind_evtchn_irq(int evtchn, int irq)
mutex_lock(&irq_mapping_update_lock);
/* After resume the irq<->evtchn mappings are all cleared out */
- BUG_ON(evtchn_to_irq[evtchn] != -1);
+ BUG_ON(get_evtchn_to_irq(evtchn) != -1);
/* Expect irq to have been bound before,
so there should be a proper type */
BUG_ON(info->type == IRQT_UNBOUND);
- xen_irq_info_evtchn_init(irq, evtchn);
+ (void)xen_irq_info_evtchn_setup(irq, evtchn);
mutex_unlock(&irq_mapping_update_lock);
@@ -1445,6 +1284,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
{
struct evtchn_bind_vcpu bind_vcpu;
int evtchn = evtchn_from_irq(irq);
+ int masked;
if (!VALID_EVTCHN(evtchn))
return -1;
@@ -1461,6 +1301,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
bind_vcpu.vcpu = tcpu;
/*
+ * Mask the event while changing the VCPU binding to prevent
+ * it being delivered on an unexpected VCPU.
+ */
+ masked = test_and_set_mask(evtchn);
+
+ /*
* If this fails, it usually just indicates that we're dealing with a
* virq or IPI channel, which don't actually need to be rebound. Ignore
* it, but don't do the xenlinux-level rebind in that case.
@@ -1468,33 +1314,20 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
bind_evtchn_to_cpu(evtchn, tcpu);
+ if (!masked)
+ unmask_evtchn(evtchn);
+
return 0;
}
static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
bool force)
{
- unsigned tcpu = cpumask_first(dest);
+ unsigned tcpu = cpumask_first_and(dest, cpu_online_mask);
return rebind_irq_to_cpu(data->irq, tcpu);
}
-int resend_irq_on_evtchn(unsigned int irq)
-{
- int masked, evtchn = evtchn_from_irq(irq);
- struct shared_info *s = HYPERVISOR_shared_info;
-
- if (!VALID_EVTCHN(evtchn))
- return 1;
-
- masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
- sync_set_bit(evtchn, s->evtchn_pending);
- if (!masked)
- unmask_evtchn(evtchn);
-
- return 1;
-}
-
static void enable_dynirq(struct irq_data *data)
{
int evtchn = evtchn_from_irq(data->irq);
@@ -1529,21 +1362,18 @@ static void mask_ack_dynirq(struct irq_data *data)
static int retrigger_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(data->irq);
- struct shared_info *sh = HYPERVISOR_shared_info;
- int ret = 0;
+ unsigned int evtchn = evtchn_from_irq(data->irq);
+ int masked;
- if (VALID_EVTCHN(evtchn)) {
- int masked;
+ if (!VALID_EVTCHN(evtchn))
+ return 0;
- masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
- sync_set_bit(evtchn, sh->evtchn_pending);
- if (!masked)
- unmask_evtchn(evtchn);
- ret = 1;
- }
+ masked = test_and_set_mask(evtchn);
+ set_evtchn(evtchn);
+ if (!masked)
+ unmask_evtchn(evtchn);
- return ret;
+ return 1;
}
static void restore_pirqs(void)
@@ -1572,8 +1402,8 @@ static void restore_pirqs(void)
rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
if (rc) {
- printk(KERN_WARNING "xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
- gsi, irq, pirq, rc);
+ pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
+ gsi, irq, pirq, rc);
xen_free_irq(irq);
continue;
}
@@ -1604,7 +1434,7 @@ static void restore_cpu_virqs(unsigned int cpu)
evtchn = bind_virq.port;
/* Record the new mapping. */
- xen_irq_info_virq_init(cpu, irq, evtchn, virq);
+ (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
bind_evtchn_to_cpu(evtchn, cpu);
}
}
@@ -1628,7 +1458,7 @@ static void restore_cpu_ipis(unsigned int cpu)
evtchn = bind_ipi.port;
/* Record the new mapping. */
- xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
+ (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
bind_evtchn_to_cpu(evtchn, cpu);
}
}
@@ -1690,7 +1520,12 @@ void xen_poll_irq(int irq)
int xen_test_irq_shared(int irq)
{
struct irq_info *info = info_for_irq(irq);
- struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq };
+ struct physdev_irq_status_query irq_status;
+
+ if (WARN_ON(!info))
+ return -ENOENT;
+
+ irq_status.irq = info->u.pirq.pirq;
if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
return 0;
@@ -1700,21 +1535,18 @@ EXPORT_SYMBOL_GPL(xen_test_irq_shared);
void xen_irq_resume(void)
{
- unsigned int cpu, evtchn;
+ unsigned int cpu;
struct irq_info *info;
- init_evtchn_cpu_bindings();
-
/* New event-channel space is not 'live' yet. */
- for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
- mask_evtchn(evtchn);
+ xen_evtchn_mask_all();
+ xen_evtchn_resume();
/* No IRQ <-> event-channel mappings. */
list_for_each_entry(info, &xen_irq_list_head, list)
info->evtchn = 0; /* zap event-channel binding */
- for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
- evtchn_to_irq[evtchn] = -1;
+ clear_evtchn_to_irq_all();
for_each_possible_cpu(cpu) {
restore_cpu_virqs(cpu);
@@ -1787,46 +1619,58 @@ void xen_callback_vector(void)
int rc;
uint64_t callback_via;
if (xen_have_vector_callback) {
- callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK);
+ callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
rc = xen_set_callback_via(callback_via);
if (rc) {
- printk(KERN_ERR "Request for Xen HVM callback vector"
- " failed.\n");
+ pr_err("Request for Xen HVM callback vector failed\n");
xen_have_vector_callback = 0;
return;
}
- printk(KERN_INFO "Xen HVM callback vector for event delivery is "
- "enabled\n");
+ pr_info("Xen HVM callback vector for event delivery is enabled\n");
/* in the restore case the vector has already been allocated */
- if (!test_bit(XEN_HVM_EVTCHN_CALLBACK, used_vectors))
- alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector);
+ if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors))
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
+ xen_hvm_callback_vector);
}
}
#else
void xen_callback_vector(void) {}
#endif
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static bool fifo_events = true;
+module_param(fifo_events, bool, 0);
+
void __init xen_init_IRQ(void)
{
- int i;
+ int ret = -EINVAL;
- evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
- GFP_KERNEL);
- BUG_ON(!evtchn_to_irq);
- for (i = 0; i < NR_EVENT_CHANNELS; i++)
- evtchn_to_irq[i] = -1;
+ if (fifo_events)
+ ret = xen_evtchn_fifo_init();
+ if (ret < 0)
+ xen_evtchn_2l_init();
- init_evtchn_cpu_bindings();
+ evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
+ sizeof(*evtchn_to_irq), GFP_KERNEL);
+ BUG_ON(!evtchn_to_irq);
/* No event channels are 'live' right now. */
- for (i = 0; i < NR_EVENT_CHANNELS; i++)
- mask_evtchn(i);
+ xen_evtchn_mask_all();
pirq_needs_eoi = pirq_needs_eoi_flag;
#ifdef CONFIG_X86
- if (xen_hvm_domain()) {
+ if (xen_pv_domain()) {
+ irq_ctx_init(smp_processor_id());
+ if (xen_initial_domain())
+ pci_xen_initial_domain();
+ }
+ if (xen_feature(XENFEAT_hvm_callback_vector))
xen_callback_vector();
+
+ if (xen_hvm_domain()) {
native_init_IRQ();
/* pci_xen_hvm_init must be called after native_init_IRQ so that
* __acpi_register_gsi can point at the right function */
@@ -1835,13 +1679,10 @@ void __init xen_init_IRQ(void)
int rc;
struct physdev_pirq_eoi_gmfn eoi_gmfn;
- irq_ctx_init(smp_processor_id());
- if (xen_initial_domain())
- pci_xen_initial_domain();
-
pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
+ /* TODO: No PVH support for PIRQ EOI */
if (rc != 0) {
free_page((unsigned long) pirq_eoi_map);
pirq_eoi_map = NULL;
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
new file mode 100644
index 00000000000..84b4bfb8434
--- /dev/null
+++ b/drivers/xen/events/events_fifo.c
@@ -0,0 +1,443 @@
+/*
+ * Xen event channels (FIFO-based ABI)
+ *
+ * Copyright (C) 2013 Citrix Systems R&D ltd.
+ *
+ * This source code is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * Or, when distributed separately from the Linux kernel or
+ * incorporated into other software packages, subject to the following
+ * license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
+
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+#include "events_internal.h"
+
+#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
+#define MAX_EVENT_ARRAY_PAGES (EVTCHN_FIFO_NR_CHANNELS / EVENT_WORDS_PER_PAGE)
+
+struct evtchn_fifo_queue {
+ uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
+};
+
+static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block);
+static DEFINE_PER_CPU(struct evtchn_fifo_queue, cpu_queue);
+static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES] __read_mostly;
+static unsigned event_array_pages __read_mostly;
+
+/*
+ * sync_set_bit() and friends must be unsigned long aligned on non-x86
+ * platforms.
+ */
+#if !defined(CONFIG_X86) && BITS_PER_LONG > 32
+
+#define BM(w) (unsigned long *)((unsigned long)w & ~0x7UL)
+#define EVTCHN_FIFO_BIT(b, w) \
+ (((unsigned long)w & 0x4UL) ? (EVTCHN_FIFO_ ##b + 32) : EVTCHN_FIFO_ ##b)
+
+#else
+
+#define BM(w) ((unsigned long *)(w))
+#define EVTCHN_FIFO_BIT(b, w) EVTCHN_FIFO_ ##b
+
+#endif
+
+static inline event_word_t *event_word_from_port(unsigned port)
+{
+ unsigned i = port / EVENT_WORDS_PER_PAGE;
+
+ return event_array[i] + port % EVENT_WORDS_PER_PAGE;
+}
+
+static unsigned evtchn_fifo_max_channels(void)
+{
+ return EVTCHN_FIFO_NR_CHANNELS;
+}
+
+static unsigned evtchn_fifo_nr_channels(void)
+{
+ return event_array_pages * EVENT_WORDS_PER_PAGE;
+}
+
+static void free_unused_array_pages(void)
+{
+ unsigned i;
+
+ for (i = event_array_pages; i < MAX_EVENT_ARRAY_PAGES; i++) {
+ if (!event_array[i])
+ break;
+ free_page((unsigned long)event_array[i]);
+ event_array[i] = NULL;
+ }
+}
+
+static void init_array_page(event_word_t *array_page)
+{
+ unsigned i;
+
+ for (i = 0; i < EVENT_WORDS_PER_PAGE; i++)
+ array_page[i] = 1 << EVTCHN_FIFO_MASKED;
+}
+
+static int evtchn_fifo_setup(struct irq_info *info)
+{
+ unsigned port = info->evtchn;
+ unsigned new_array_pages;
+ int ret;
+
+ new_array_pages = port / EVENT_WORDS_PER_PAGE + 1;
+
+ if (new_array_pages > MAX_EVENT_ARRAY_PAGES)
+ return -EINVAL;
+
+ while (event_array_pages < new_array_pages) {
+ void *array_page;
+ struct evtchn_expand_array expand_array;
+
+ /* Might already have a page if we've resumed. */
+ array_page = event_array[event_array_pages];
+ if (!array_page) {
+ array_page = (void *)__get_free_page(GFP_KERNEL);
+ if (array_page == NULL) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ event_array[event_array_pages] = array_page;
+ }
+
+ /* Mask all events in this page before adding it. */
+ init_array_page(array_page);
+
+ expand_array.array_gfn = virt_to_mfn(array_page);
+
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, &expand_array);
+ if (ret < 0)
+ goto error;
+
+ event_array_pages++;
+ }
+ return 0;
+
+ error:
+ if (event_array_pages == 0)
+ panic("xen: unable to expand event array with initial page (%d)\n", ret);
+ else
+ pr_err("unable to expand event array (%d)\n", ret);
+ free_unused_array_pages();
+ return ret;
+}
+
+static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu)
+{
+ /* no-op */
+}
+
+static void evtchn_fifo_clear_pending(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ sync_clear_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
+}
+
+static void evtchn_fifo_set_pending(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ sync_set_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
+}
+
+static bool evtchn_fifo_is_pending(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
+}
+
+static bool evtchn_fifo_test_and_set_mask(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ return sync_test_and_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
+}
+
+static void evtchn_fifo_mask(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ sync_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
+}
+
+static bool evtchn_fifo_is_masked(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
+}
+/*
+ * Clear MASKED, spinning if BUSY is set.
+ */
+static void clear_masked(volatile event_word_t *word)
+{
+ event_word_t new, old, w;
+
+ w = *word;
+
+ do {
+ old = w & ~(1 << EVTCHN_FIFO_BUSY);
+ new = old & ~(1 << EVTCHN_FIFO_MASKED);
+ w = sync_cmpxchg(word, old, new);
+ } while (w != old);
+}
+
+static void evtchn_fifo_unmask(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+
+ BUG_ON(!irqs_disabled());
+
+ clear_masked(word);
+ if (evtchn_fifo_is_pending(port)) {
+ struct evtchn_unmask unmask = { .port = port };
+ (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+ }
+}
+
+static uint32_t clear_linked(volatile event_word_t *word)
+{
+ event_word_t new, old, w;
+
+ w = *word;
+
+ do {
+ old = w;
+ new = (w & ~((1 << EVTCHN_FIFO_LINKED)
+ | EVTCHN_FIFO_LINK_MASK));
+ } while ((w = sync_cmpxchg(word, old, new)) != old);
+
+ return w & EVTCHN_FIFO_LINK_MASK;
+}
+
+static void handle_irq_for_port(unsigned port)
+{
+ int irq;
+
+ irq = get_evtchn_to_irq(port);
+ if (irq != -1)
+ generic_handle_irq(irq);
+}
+
+static void consume_one_event(unsigned cpu,
+ struct evtchn_fifo_control_block *control_block,
+ unsigned priority, unsigned long *ready)
+{
+ struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
+ uint32_t head;
+ unsigned port;
+ event_word_t *word;
+
+ head = q->head[priority];
+
+ /*
+ * Reached the tail last time? Read the new HEAD from the
+ * control block.
+ */
+ if (head == 0) {
+ rmb(); /* Ensure word is up-to-date before reading head. */
+ head = control_block->head[priority];
+ }
+
+ port = head;
+ word = event_word_from_port(port);
+ head = clear_linked(word);
+
+ /*
+ * If the link is non-zero, there are more events in the
+ * queue, otherwise the queue is empty.
+ *
+ * If the queue is empty, clear this priority from our local
+ * copy of the ready word.
+ */
+ if (head == 0)
+ clear_bit(priority, ready);
+
+ if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port))
+ handle_irq_for_port(port);
+
+ q->head[priority] = head;
+}
+
+static void evtchn_fifo_handle_events(unsigned cpu)
+{
+ struct evtchn_fifo_control_block *control_block;
+ unsigned long ready;
+ unsigned q;
+
+ control_block = per_cpu(cpu_control_block, cpu);
+
+ ready = xchg(&control_block->ready, 0);
+
+ while (ready) {
+ q = find_first_bit(BM(&ready), EVTCHN_FIFO_MAX_QUEUES);
+ consume_one_event(cpu, control_block, q, &ready);
+ ready |= xchg(&control_block->ready, 0);
+ }
+}
+
+static void evtchn_fifo_resume(void)
+{
+ unsigned cpu;
+
+ for_each_possible_cpu(cpu) {
+ void *control_block = per_cpu(cpu_control_block, cpu);
+ struct evtchn_init_control init_control;
+ int ret;
+
+ if (!control_block)
+ continue;
+
+ /*
+ * If this CPU is offline, take the opportunity to
+ * free the control block while it is not being
+ * used.
+ */
+ if (!cpu_online(cpu)) {
+ free_page((unsigned long)control_block);
+ per_cpu(cpu_control_block, cpu) = NULL;
+ continue;
+ }
+
+ init_control.control_gfn = virt_to_mfn(control_block);
+ init_control.offset = 0;
+ init_control.vcpu = cpu;
+
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control,
+ &init_control);
+ if (ret < 0)
+ BUG();
+ }
+
+ /*
+ * The event array starts out as empty again and is extended
+ * as normal when events are bound. The existing pages will
+ * be reused.
+ */
+ event_array_pages = 0;
+}
+
+static const struct evtchn_ops evtchn_ops_fifo = {
+ .max_channels = evtchn_fifo_max_channels,
+ .nr_channels = evtchn_fifo_nr_channels,
+ .setup = evtchn_fifo_setup,
+ .bind_to_cpu = evtchn_fifo_bind_to_cpu,
+ .clear_pending = evtchn_fifo_clear_pending,
+ .set_pending = evtchn_fifo_set_pending,
+ .is_pending = evtchn_fifo_is_pending,
+ .test_and_set_mask = evtchn_fifo_test_and_set_mask,
+ .mask = evtchn_fifo_mask,
+ .unmask = evtchn_fifo_unmask,
+ .handle_events = evtchn_fifo_handle_events,
+ .resume = evtchn_fifo_resume,
+};
+
+static int evtchn_fifo_init_control_block(unsigned cpu)
+{
+ struct page *control_block = NULL;
+ struct evtchn_init_control init_control;
+ int ret = -ENOMEM;
+
+ control_block = alloc_page(GFP_KERNEL|__GFP_ZERO);
+ if (control_block == NULL)
+ goto error;
+
+ init_control.control_gfn = virt_to_mfn(page_address(control_block));
+ init_control.offset = 0;
+ init_control.vcpu = cpu;
+
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
+ if (ret < 0)
+ goto error;
+
+ per_cpu(cpu_control_block, cpu) = page_address(control_block);
+
+ return 0;
+
+ error:
+ __free_page(control_block);
+ return ret;
+}
+
+static int evtchn_fifo_cpu_notification(struct notifier_block *self,
+ unsigned long action,
+ void *hcpu)
+{
+ int cpu = (long)hcpu;
+ int ret = 0;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (!per_cpu(cpu_control_block, cpu))
+ ret = evtchn_fifo_init_control_block(cpu);
+ break;
+ default:
+ break;
+ }
+ return ret < 0 ? NOTIFY_BAD : NOTIFY_OK;
+}
+
+static struct notifier_block evtchn_fifo_cpu_notifier = {
+ .notifier_call = evtchn_fifo_cpu_notification,
+};
+
+int __init xen_evtchn_fifo_init(void)
+{
+ int cpu = get_cpu();
+ int ret;
+
+ ret = evtchn_fifo_init_control_block(cpu);
+ if (ret < 0)
+ goto out;
+
+ pr_info("Using FIFO-based ABI\n");
+
+ evtchn_ops = &evtchn_ops_fifo;
+
+ register_cpu_notifier(&evtchn_fifo_cpu_notifier);
+out:
+ put_cpu();
+ return ret;
+}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
new file mode 100644
index 00000000000..50c2050a1e3
--- /dev/null
+++ b/drivers/xen/events/events_internal.h
@@ -0,0 +1,151 @@
+/*
+ * Xen Event Channels (internal header)
+ *
+ * Copyright (C) 2013 Citrix Systems R&D Ltd.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2 or later. See the file COPYING for more details.
+ */
+#ifndef __EVENTS_INTERNAL_H__
+#define __EVENTS_INTERNAL_H__
+
+/* Interrupt types. */
+enum xen_irq_type {
+ IRQT_UNBOUND = 0,
+ IRQT_PIRQ,
+ IRQT_VIRQ,
+ IRQT_IPI,
+ IRQT_EVTCHN
+};
+
+/*
+ * Packed IRQ information:
+ * type - enum xen_irq_type
+ * event channel - irq->event channel mapping
+ * cpu - cpu this event channel is bound to
+ * index - type-specific information:
+ * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
+ * guest, or GSI (real passthrough IRQ) of the device.
+ * VIRQ - virq number
+ * IPI - IPI vector
+ * EVTCHN -
+ */
+struct irq_info {
+ struct list_head list;
+ int refcnt;
+ enum xen_irq_type type; /* type */
+ unsigned irq;
+ unsigned int evtchn; /* event channel */
+ unsigned short cpu; /* cpu bound */
+
+ union {
+ unsigned short virq;
+ enum ipi_vector ipi;
+ struct {
+ unsigned short pirq;
+ unsigned short gsi;
+ unsigned char vector;
+ unsigned char flags;
+ uint16_t domid;
+ } pirq;
+ } u;
+};
+
+#define PIRQ_NEEDS_EOI (1 << 0)
+#define PIRQ_SHAREABLE (1 << 1)
+#define PIRQ_MSI_GROUP (1 << 2)
+
+struct evtchn_ops {
+ unsigned (*max_channels)(void);
+ unsigned (*nr_channels)(void);
+
+ int (*setup)(struct irq_info *info);
+ void (*bind_to_cpu)(struct irq_info *info, unsigned cpu);
+
+ void (*clear_pending)(unsigned port);
+ void (*set_pending)(unsigned port);
+ bool (*is_pending)(unsigned port);
+ bool (*test_and_set_mask)(unsigned port);
+ void (*mask)(unsigned port);
+ void (*unmask)(unsigned port);
+
+ void (*handle_events)(unsigned cpu);
+ void (*resume)(void);
+};
+
+extern const struct evtchn_ops *evtchn_ops;
+
+extern int **evtchn_to_irq;
+int get_evtchn_to_irq(unsigned int evtchn);
+
+struct irq_info *info_for_irq(unsigned irq);
+unsigned cpu_from_irq(unsigned irq);
+unsigned cpu_from_evtchn(unsigned int evtchn);
+
+static inline unsigned xen_evtchn_max_channels(void)
+{
+ return evtchn_ops->max_channels();
+}
+
+/*
+ * Do any ABI specific setup for a bound event channel before it can
+ * be unmasked and used.
+ */
+static inline int xen_evtchn_port_setup(struct irq_info *info)
+{
+ if (evtchn_ops->setup)
+ return evtchn_ops->setup(info);
+ return 0;
+}
+
+static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info,
+ unsigned cpu)
+{
+ evtchn_ops->bind_to_cpu(info, cpu);
+}
+
+static inline void clear_evtchn(unsigned port)
+{
+ evtchn_ops->clear_pending(port);
+}
+
+static inline void set_evtchn(unsigned port)
+{
+ evtchn_ops->set_pending(port);
+}
+
+static inline bool test_evtchn(unsigned port)
+{
+ return evtchn_ops->is_pending(port);
+}
+
+static inline bool test_and_set_mask(unsigned port)
+{
+ return evtchn_ops->test_and_set_mask(port);
+}
+
+static inline void mask_evtchn(unsigned port)
+{
+ return evtchn_ops->mask(port);
+}
+
+static inline void unmask_evtchn(unsigned port)
+{
+ return evtchn_ops->unmask(port);
+}
+
+static inline void xen_evtchn_handle_events(unsigned cpu)
+{
+ return evtchn_ops->handle_events(cpu);
+}
+
+static inline void xen_evtchn_resume(void)
+{
+ if (evtchn_ops->resume)
+ evtchn_ops->resume();
+}
+
+void xen_evtchn_2l_init(void);
+int xen_evtchn_fifo_init(void);
+
+#endif /* #ifndef __EVENTS_INTERNAL_H__ */
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index b1f60a0c0be..00f40f051d9 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -31,6 +31,8 @@
* IN THE SOFTWARE.
*/
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -55,6 +57,7 @@
struct per_user_data {
struct mutex bind_mutex; /* serialize bind/unbind operations */
+ struct rb_root evtchns;
/* Notification ring, accessed via /dev/xen/evtchn. */
#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
@@ -62,6 +65,7 @@ struct per_user_data {
evtchn_port_t *ring;
unsigned int ring_cons, ring_prod, ring_overflow;
struct mutex ring_cons_mutex; /* protect against concurrent readers */
+ spinlock_t ring_prod_lock; /* product against concurrent interrupts */
/* Processes wait on this queue when ring is empty. */
wait_queue_head_t evtchn_wait;
@@ -69,54 +73,79 @@ struct per_user_data {
const char *name;
};
-/*
- * Who's bound to each port? This is logically an array of struct
- * per_user_data *, but we encode the current enabled-state in bit 0.
- */
-static unsigned long *port_user;
-static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
+struct user_evtchn {
+ struct rb_node node;
+ struct per_user_data *user;
+ unsigned port;
+ bool enabled;
+};
-static inline struct per_user_data *get_port_user(unsigned port)
+static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{
- return (struct per_user_data *)(port_user[port] & ~1);
-}
+ struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
-static inline void set_port_user(unsigned port, struct per_user_data *u)
-{
- port_user[port] = (unsigned long)u;
+ while (*new) {
+ struct user_evtchn *this;
+
+ this = container_of(*new, struct user_evtchn, node);
+
+ parent = *new;
+ if (this->port < evtchn->port)
+ new = &((*new)->rb_left);
+ else if (this->port > evtchn->port)
+ new = &((*new)->rb_right);
+ else
+ return -EEXIST;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&evtchn->node, parent, new);
+ rb_insert_color(&evtchn->node, &u->evtchns);
+
+ return 0;
}
-static inline bool get_port_enabled(unsigned port)
+static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{
- return port_user[port] & 1;
+ rb_erase(&evtchn->node, &u->evtchns);
+ kfree(evtchn);
}
-static inline void set_port_enabled(unsigned port, bool enabled)
+static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
{
- if (enabled)
- port_user[port] |= 1;
- else
- port_user[port] &= ~1;
+ struct rb_node *node = u->evtchns.rb_node;
+
+ while (node) {
+ struct user_evtchn *evtchn;
+
+ evtchn = container_of(node, struct user_evtchn, node);
+
+ if (evtchn->port < port)
+ node = node->rb_left;
+ else if (evtchn->port > port)
+ node = node->rb_right;
+ else
+ return evtchn;
+ }
+ return NULL;
}
static irqreturn_t evtchn_interrupt(int irq, void *data)
{
- unsigned int port = (unsigned long)data;
- struct per_user_data *u;
-
- spin_lock(&port_user_lock);
-
- u = get_port_user(port);
+ struct user_evtchn *evtchn = data;
+ struct per_user_data *u = evtchn->user;
- WARN(!get_port_enabled(port),
+ WARN(!evtchn->enabled,
"Interrupt for port %d, but apparently not enabled; per-user %p\n",
- port, u);
+ evtchn->port, u);
disable_irq_nosync(irq);
- set_port_enabled(port, false);
+ evtchn->enabled = false;
+
+ spin_lock(&u->ring_prod_lock);
if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
- u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+ u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
wmb(); /* Ensure ring contents visible */
if (u->ring_cons == u->ring_prod++) {
wake_up_interruptible(&u->evtchn_wait);
@@ -126,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
} else
u->ring_overflow = 1;
- spin_unlock(&port_user_lock);
+ spin_unlock(&u->ring_prod_lock);
return IRQ_HANDLED;
}
@@ -227,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
if (copy_from_user(kbuf, buf, count) != 0)
goto out;
- spin_lock_irq(&port_user_lock);
+ mutex_lock(&u->bind_mutex);
for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
unsigned port = kbuf[i];
+ struct user_evtchn *evtchn;
- if (port < NR_EVENT_CHANNELS &&
- get_port_user(port) == u &&
- !get_port_enabled(port)) {
- set_port_enabled(port, true);
+ evtchn = find_evtchn(u, port);
+ if (evtchn && !evtchn->enabled) {
+ evtchn->enabled = true;
enable_irq(irq_from_evtchn(port));
}
}
- spin_unlock_irq(&port_user_lock);
+ mutex_unlock(&u->bind_mutex);
rc = count;
@@ -251,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
static int evtchn_bind_to_user(struct per_user_data *u, int port)
{
+ struct user_evtchn *evtchn;
+ struct evtchn_close close;
int rc = 0;
/*
@@ -261,25 +292,46 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
* interrupt handler yet, and our caller has already
* serialized bind operations.)
*/
- BUG_ON(get_port_user(port) != NULL);
- set_port_user(port, u);
- set_port_enabled(port, true); /* start enabled */
- rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
- u->name, (void *)(unsigned long)port);
- if (rc >= 0)
- rc = evtchn_make_refcounted(port);
+ evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
+ if (!evtchn)
+ return -ENOMEM;
+
+ evtchn->user = u;
+ evtchn->port = port;
+ evtchn->enabled = true; /* start enabled */
+ rc = add_evtchn(u, evtchn);
+ if (rc < 0)
+ goto err;
+
+ rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
+ u->name, evtchn);
+ if (rc < 0)
+ goto err;
+
+ rc = evtchn_make_refcounted(port);
+ return rc;
+
+err:
+ /* bind failed, should close the port now */
+ close.port = port;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+ del_evtchn(u, evtchn);
return rc;
}
-static void evtchn_unbind_from_user(struct per_user_data *u, int port)
+static void evtchn_unbind_from_user(struct per_user_data *u,
+ struct user_evtchn *evtchn)
{
- int irq = irq_from_evtchn(port);
+ int irq = irq_from_evtchn(evtchn->port);
+
+ BUG_ON(irq < 0);
- unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+ unbind_from_irqhandler(irq, evtchn);
- set_port_user(port, NULL);
+ del_evtchn(u, evtchn);
}
static long evtchn_ioctl(struct file *file,
@@ -358,45 +410,38 @@ static long evtchn_ioctl(struct file *file,
case IOCTL_EVTCHN_UNBIND: {
struct ioctl_evtchn_unbind unbind;
+ struct user_evtchn *evtchn;
rc = -EFAULT;
if (copy_from_user(&unbind, uarg, sizeof(unbind)))
break;
rc = -EINVAL;
- if (unbind.port >= NR_EVENT_CHANNELS)
+ if (unbind.port >= xen_evtchn_nr_channels())
break;
- spin_lock_irq(&port_user_lock);
-
rc = -ENOTCONN;
- if (get_port_user(unbind.port) != u) {
- spin_unlock_irq(&port_user_lock);
+ evtchn = find_evtchn(u, unbind.port);
+ if (!evtchn)
break;
- }
disable_irq(irq_from_evtchn(unbind.port));
-
- spin_unlock_irq(&port_user_lock);
-
- evtchn_unbind_from_user(u, unbind.port);
-
+ evtchn_unbind_from_user(u, evtchn);
rc = 0;
break;
}
case IOCTL_EVTCHN_NOTIFY: {
struct ioctl_evtchn_notify notify;
+ struct user_evtchn *evtchn;
rc = -EFAULT;
if (copy_from_user(&notify, uarg, sizeof(notify)))
break;
- if (notify.port >= NR_EVENT_CHANNELS) {
- rc = -EINVAL;
- } else if (get_port_user(notify.port) != u) {
- rc = -ENOTCONN;
- } else {
+ rc = -ENOTCONN;
+ evtchn = find_evtchn(u, notify.port);
+ if (evtchn) {
notify_remote_via_evtchn(notify.port);
rc = 0;
}
@@ -406,9 +451,9 @@ static long evtchn_ioctl(struct file *file,
case IOCTL_EVTCHN_RESET: {
/* Initialise the ring to empty. Clear errors. */
mutex_lock(&u->ring_cons_mutex);
- spin_lock_irq(&port_user_lock);
+ spin_lock_irq(&u->ring_prod_lock);
u->ring_cons = u->ring_prod = u->ring_overflow = 0;
- spin_unlock_irq(&port_user_lock);
+ spin_unlock_irq(&u->ring_prod_lock);
mutex_unlock(&u->ring_cons_mutex);
rc = 0;
break;
@@ -467,6 +512,7 @@ static int evtchn_open(struct inode *inode, struct file *filp)
mutex_init(&u->bind_mutex);
mutex_init(&u->ring_cons_mutex);
+ spin_lock_init(&u->ring_prod_lock);
filp->private_data = u;
@@ -475,29 +521,18 @@ static int evtchn_open(struct inode *inode, struct file *filp)
static int evtchn_release(struct inode *inode, struct file *filp)
{
- int i;
struct per_user_data *u = filp->private_data;
+ struct rb_node *node;
- spin_lock_irq(&port_user_lock);
+ while ((node = u->evtchns.rb_node)) {
+ struct user_evtchn *evtchn;
- free_page((unsigned long)u->ring);
-
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- if (get_port_user(i) != u)
- continue;
-
- disable_irq(irq_from_evtchn(i));
- }
-
- spin_unlock_irq(&port_user_lock);
-
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- if (get_port_user(i) != u)
- continue;
-
- evtchn_unbind_from_user(get_port_user(i), i);
+ evtchn = rb_entry(node, struct user_evtchn, node);
+ disable_irq(irq_from_evtchn(evtchn->port));
+ evtchn_unbind_from_user(u, evtchn);
}
+ free_page((unsigned long)u->ring);
kfree(u->name);
kfree(u);
@@ -528,29 +563,20 @@ static int __init evtchn_init(void)
if (!xen_domain())
return -ENODEV;
- port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
- if (port_user == NULL)
- return -ENOMEM;
-
- spin_lock_init(&port_user_lock);
-
- /* Create '/dev/misc/evtchn'. */
+ /* Create '/dev/xen/evtchn'. */
err = misc_register(&evtchn_miscdev);
if (err != 0) {
- printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+ pr_err("Could not register /dev/xen/evtchn\n");
return err;
}
- printk(KERN_INFO "Event-channel device installed.\n");
+ pr_info("Event-channel device installed\n");
return 0;
}
static void __exit evtchn_cleanup(void)
{
- kfree(port_user);
- port_user = NULL;
-
misc_deregister(&evtchn_miscdev);
}
diff --git a/drivers/xen/fallback.c b/drivers/xen/fallback.c
index 0ef7c4d40f8..b04fb64c5a9 100644
--- a/drivers/xen/fallback.c
+++ b/drivers/xen/fallback.c
@@ -44,7 +44,7 @@ int xen_event_channel_op_compat(int cmd, void *arg)
}
EXPORT_SYMBOL_GPL(xen_event_channel_op_compat);
-int HYPERVISOR_physdev_op_compat(int cmd, void *arg)
+int xen_physdev_op_compat(int cmd, void *arg)
{
struct physdev_op op;
int rc;
@@ -78,3 +78,4 @@ int HYPERVISOR_physdev_op_compat(int cmd, void *arg)
return rc;
}
+EXPORT_SYMBOL_GPL(xen_physdev_op_compat);
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 4097987b330..787d1794541 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -48,6 +48,8 @@
* grant operation.
*/
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
#include <linux/atomic.h>
#include <linux/module.h>
#include <linux/miscdevice.h>
@@ -507,7 +509,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
int rv, i;
if (!(vma->vm_flags & VM_SHARED)) {
- printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
+ pr_err("%s: Mapping must be shared\n", __func__);
return -EINVAL;
}
@@ -584,7 +586,7 @@ static int __init gntalloc_init(void)
err = misc_register(&gntalloc_miscdev);
if (err != 0) {
- printk(KERN_ERR "Could not register misc gntalloc device\n");
+ pr_err("Could not register misc gntalloc device\n");
return err;
}
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 3c8803feba2..073b4a19a8b 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -19,6 +19,8 @@
#undef DEBUG
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -270,19 +272,12 @@ static int map_grant_pages(struct grant_map *map)
* with find_grant_ptes.
*/
for (i = 0; i < map->count; i++) {
- unsigned level;
unsigned long address = (unsigned long)
pfn_to_kaddr(page_to_pfn(map->pages[i]));
- pte_t *ptep;
- u64 pte_maddr = 0;
BUG_ON(PageHighMem(map->pages[i]));
- ptep = lookup_address(address, &level);
- pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
- gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
- map->flags |
- GNTMAP_host_map |
- GNTMAP_contains_pte,
+ gnttab_set_map_op(&map->kmap_ops[i], address,
+ map->flags | GNTMAP_host_map,
map->grants[i].ref,
map->grants[i].domid);
}
@@ -760,7 +755,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
if (use_ptemod && map->vma)
goto unlock_out;
if (use_ptemod && priv->mm != vma->vm_mm) {
- printk(KERN_WARNING "Huh? Other mm?\n");
+ pr_warn("Huh? Other mm?\n");
goto unlock_out;
}
@@ -795,7 +790,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
vma->vm_end - vma->vm_start,
find_grant_ptes, map);
if (err) {
- printk(KERN_WARNING "find_grant_ptes() failure.\n");
+ pr_warn("find_grant_ptes() failure.\n");
goto out_put_map;
}
}
@@ -851,11 +846,11 @@ static int __init gntdev_init(void)
if (!xen_domain())
return -ENODEV;
- use_ptemod = xen_pv_domain();
+ use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap);
err = misc_register(&gntdev_miscdev);
if (err != 0) {
- printk(KERN_ERR "Could not register gntdev device\n");
+ pr_err("Could not register gntdev device\n");
return err;
}
return 0;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 157c0ccda3e..eeba7544f0c 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -31,6 +31,8 @@
* IN THE SOFTWARE.
*/
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -47,6 +49,7 @@
#include <xen/grant_table.h>
#include <xen/interface/memory.h>
#include <xen/hvc-console.h>
+#include <xen/swiotlb-xen.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/interface.h>
@@ -59,12 +62,10 @@
static grant_ref_t **gnttab_list;
static unsigned int nr_grant_frames;
-static unsigned int boot_max_nr_grant_frames;
static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
static DEFINE_SPINLOCK(gnttab_list_lock);
-unsigned long xen_hvm_resume_frames;
-EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
+struct grant_frames xen_auto_xlat_grant_frames;
static union {
struct grant_entry_v1 *v1;
@@ -508,8 +509,7 @@ static void gnttab_handle_deferred(unsigned long unused)
entry = NULL;
} else {
if (!--entry->warn_delay)
- pr_info("g.e. %#x still pending\n",
- entry->ref);
+ pr_info("g.e. %#x still pending\n", entry->ref);
if (!first)
first = entry;
}
@@ -729,9 +729,18 @@ void gnttab_request_free_callback(struct gnttab_free_callback *callback,
void (*fn)(void *), void *arg, u16 count)
{
unsigned long flags;
+ struct gnttab_free_callback *cb;
+
spin_lock_irqsave(&gnttab_list_lock, flags);
- if (callback->next)
- goto out;
+
+ /* Check if the callback is already on the list */
+ cb = gnttab_free_callback_list;
+ while (cb) {
+ if (cb == callback)
+ goto out;
+ cb = cb->next;
+ }
+
callback->fn = fn;
callback->arg = arg;
callback->count = count;
@@ -816,6 +825,11 @@ static unsigned int __max_nr_grant_frames(void)
unsigned int gnttab_max_grant_frames(void)
{
unsigned int xen_max = __max_nr_grant_frames();
+ static unsigned int boot_max_nr_grant_frames;
+
+ /* First time, initialize it properly. */
+ if (!boot_max_nr_grant_frames)
+ boot_max_nr_grant_frames = __max_nr_grant_frames();
if (xen_max > boot_max_nr_grant_frames)
return boot_max_nr_grant_frames;
@@ -823,6 +837,51 @@ unsigned int gnttab_max_grant_frames(void)
}
EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
+int gnttab_setup_auto_xlat_frames(phys_addr_t addr)
+{
+ xen_pfn_t *pfn;
+ unsigned int max_nr_gframes = __max_nr_grant_frames();
+ unsigned int i;
+ void *vaddr;
+
+ if (xen_auto_xlat_grant_frames.count)
+ return -EINVAL;
+
+ vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes);
+ if (vaddr == NULL) {
+ pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n",
+ &addr);
+ return -ENOMEM;
+ }
+ pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
+ if (!pfn) {
+ xen_unmap(vaddr);
+ return -ENOMEM;
+ }
+ for (i = 0; i < max_nr_gframes; i++)
+ pfn[i] = PFN_DOWN(addr) + i;
+
+ xen_auto_xlat_grant_frames.vaddr = vaddr;
+ xen_auto_xlat_grant_frames.pfn = pfn;
+ xen_auto_xlat_grant_frames.count = max_nr_gframes;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
+
+void gnttab_free_auto_xlat_frames(void)
+{
+ if (!xen_auto_xlat_grant_frames.count)
+ return;
+ kfree(xen_auto_xlat_grant_frames.pfn);
+ xen_unmap(xen_auto_xlat_grant_frames.vaddr);
+
+ xen_auto_xlat_grant_frames.pfn = NULL;
+ xen_auto_xlat_grant_frames.count = 0;
+ xen_auto_xlat_grant_frames.vaddr = NULL;
+}
+EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
+
/* Handling of paged out grant targets (GNTST_eagain) */
#define MAX_DELAY 256
static inline void
@@ -838,7 +897,7 @@ gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
} while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
if (delay >= MAX_DELAY) {
- printk(KERN_ERR "%s: %s eagain grant\n", func, current->comm);
+ pr_err("%s: %s eagain grant\n", func, current->comm);
*status = GNTST_bad_page;
}
}
@@ -874,9 +933,6 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
struct page **pages, unsigned int count)
{
int i, ret;
- bool lazy = false;
- pte_t *pte;
- unsigned long mfn;
ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
if (ret)
@@ -888,36 +944,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
&map_ops[i].status, __func__);
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return ret;
-
- if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
- arch_enter_lazy_mmu_mode();
- lazy = true;
- }
-
- for (i = 0; i < count; i++) {
- /* Do not add to override if the map failed. */
- if (map_ops[i].status)
- continue;
-
- if (map_ops[i].flags & GNTMAP_contains_pte) {
- pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
- (map_ops[i].host_addr & ~PAGE_MASK));
- mfn = pte_mfn(*pte);
- } else {
- mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
- }
- ret = m2p_add_override(mfn, pages[i], kmap_ops ?
- &kmap_ops[i] : NULL);
- if (ret)
- return ret;
- }
-
- if (lazy)
- arch_leave_lazy_mmu_mode();
-
- return ret;
+ return set_foreign_p2m_mapping(map_ops, kmap_ops, pages, count);
}
EXPORT_SYMBOL_GPL(gnttab_map_refs);
@@ -925,32 +952,13 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count)
{
- int i, ret;
- bool lazy = false;
+ int ret;
ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
if (ret)
return ret;
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return ret;
-
- if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
- arch_enter_lazy_mmu_mode();
- lazy = true;
- }
-
- for (i = 0; i < count; i++) {
- ret = m2p_remove_override(pages[i], kmap_ops ?
- &kmap_ops[i] : NULL);
- if (ret)
- return ret;
- }
-
- if (lazy)
- arch_leave_lazy_mmu_mode();
-
- return ret;
+ return clear_foreign_p2m_mapping(unmap_ops, kmap_ops, pages, count);
}
EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
@@ -1033,10 +1041,11 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
unsigned int nr_gframes = end_idx + 1;
int rc;
- if (xen_hvm_domain()) {
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
struct xen_add_to_physmap xatp;
unsigned int i = end_idx;
rc = 0;
+ BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
/*
* Loop backwards, so that the first hypercall has the largest
* index, ensuring that the table will grow only once.
@@ -1045,11 +1054,11 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
xatp.domid = DOMID_SELF;
xatp.idx = i;
xatp.space = XENMAPSPACE_grant_table;
- xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
+ xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
if (rc != 0) {
- printk(KERN_WARNING
- "grant table add_to_physmap failed, err=%d\n", rc);
+ pr_warn("grant table add_to_physmap failed, err=%d\n",
+ rc);
break;
}
} while (i-- > start_idx);
@@ -1108,10 +1117,8 @@ static void gnttab_request_version(void)
int rc;
struct gnttab_set_version gsv;
- if (xen_hvm_domain())
- gsv.version = 1;
- else
- gsv.version = 2;
+ gsv.version = 1;
+
rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
if (rc == 0 && gsv.version == 2) {
grant_table_version = 2;
@@ -1131,8 +1138,7 @@ static void gnttab_request_version(void)
grefs_per_grant_frame = PAGE_SIZE / sizeof(struct grant_entry_v1);
gnttab_interface = &gnttab_v1_ops;
}
- printk(KERN_INFO "Grant tables using version %d layout.\n",
- grant_table_version);
+ pr_info("Grant tables using version %d layout\n", grant_table_version);
}
static int gnttab_setup(void)
@@ -1143,22 +1149,15 @@ static int gnttab_setup(void)
if (max_nr_gframes < nr_grant_frames)
return -ENOSYS;
- if (xen_pv_domain())
- return gnttab_map(0, nr_grant_frames - 1);
-
- if (gnttab_shared.addr == NULL) {
- gnttab_shared.addr = ioremap(xen_hvm_resume_frames,
- PAGE_SIZE * max_nr_gframes);
+ if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
+ gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
if (gnttab_shared.addr == NULL) {
- printk(KERN_WARNING
- "Failed to ioremap gnttab share frames!");
+ pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n",
+ (unsigned long)xen_auto_xlat_grant_frames.vaddr);
return -ENOMEM;
}
}
-
- gnttab_map(0, nr_grant_frames - 1);
-
- return 0;
+ return gnttab_map(0, nr_grant_frames - 1);
}
int gnttab_resume(void)
@@ -1169,7 +1168,8 @@ int gnttab_resume(void)
int gnttab_suspend(void)
{
- gnttab_interface->unmap_frames();
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ gnttab_interface->unmap_frames();
return 0;
}
@@ -1195,19 +1195,20 @@ static int gnttab_expand(unsigned int req_entries)
int gnttab_init(void)
{
int i;
+ unsigned long max_nr_grant_frames;
unsigned int max_nr_glist_frames, nr_glist_frames;
unsigned int nr_init_grefs;
int ret;
gnttab_request_version();
+ max_nr_grant_frames = gnttab_max_grant_frames();
nr_grant_frames = 1;
- boot_max_nr_grant_frames = __max_nr_grant_frames();
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
BUG_ON(grefs_per_grant_frame == 0);
- max_nr_glist_frames = (boot_max_nr_grant_frames *
+ max_nr_glist_frames = (max_nr_grant_frames *
grefs_per_grant_frame / RPP);
gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
@@ -1224,6 +1225,11 @@ int gnttab_init(void)
}
}
+ ret = arch_gnttab_init(max_nr_grant_frames,
+ nr_status_frames(max_nr_grant_frames));
+ if (ret < 0)
+ goto ini_nomem;
+
if (gnttab_setup() < 0) {
ret = -ENODEV;
goto ini_nomem;
@@ -1260,5 +1266,6 @@ static int __gnttab_init(void)
return gnttab_init();
}
-
-core_initcall(__gnttab_init);
+/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
+ * beforehand to initialize xen_auto_xlat_grant_frames. */
+core_initcall_sync(__gnttab_init);
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 412b96cc530..5f1e1f3cd18 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -1,6 +1,9 @@
/*
* Handle extern requests for shutdown, reboot and sysrq
*/
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/err.h>
#include <linux/slab.h>
@@ -38,30 +41,21 @@ static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
struct suspend_info {
int cancelled;
- unsigned long arg; /* extra hypercall argument */
- void (*pre)(void);
- void (*post)(int cancelled);
};
-static void xen_hvm_post_suspend(int cancelled)
-{
- xen_arch_hvm_post_suspend(cancelled);
- gnttab_resume();
-}
+static RAW_NOTIFIER_HEAD(xen_resume_notifier);
-static void xen_pre_suspend(void)
+void xen_resume_notifier_register(struct notifier_block *nb)
{
- xen_mm_pin_all();
- gnttab_suspend();
- xen_arch_pre_suspend();
+ raw_notifier_chain_register(&xen_resume_notifier, nb);
}
+EXPORT_SYMBOL_GPL(xen_resume_notifier_register);
-static void xen_post_suspend(int cancelled)
+void xen_resume_notifier_unregister(struct notifier_block *nb)
{
- xen_arch_post_suspend(cancelled);
- gnttab_resume();
- xen_mm_unpin_all();
+ raw_notifier_chain_unregister(&xen_resume_notifier, nb);
}
+EXPORT_SYMBOL_GPL(xen_resume_notifier_unregister);
#ifdef CONFIG_HIBERNATE_CALLBACKS
static int xen_suspend(void *data)
@@ -73,27 +67,27 @@ static int xen_suspend(void *data)
err = syscore_suspend();
if (err) {
- printk(KERN_ERR "xen_suspend: system core suspend failed: %d\n",
- err);
+ pr_err("%s: system core suspend failed: %d\n", __func__, err);
return err;
}
- if (si->pre)
- si->pre();
+ gnttab_suspend();
+ xen_arch_pre_suspend();
/*
* This hypercall returns 1 if suspend was cancelled
* or the domain was merely checkpointed, and 0 if it
* is resuming in a new domain.
*/
- si->cancelled = HYPERVISOR_suspend(si->arg);
+ si->cancelled = HYPERVISOR_suspend(xen_pv_domain()
+ ? virt_to_mfn(xen_start_info)
+ : 0);
- if (si->post)
- si->post(si->cancelled);
+ xen_arch_post_suspend(si->cancelled);
+ gnttab_resume();
if (!si->cancelled) {
xen_irq_resume();
- xen_console_resume();
xen_timer_resume();
}
@@ -115,14 +109,14 @@ static void do_suspend(void)
during suspend. */
err = freeze_processes();
if (err) {
- printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
+ pr_err("%s: freeze failed %d\n", __func__, err);
goto out;
}
#endif
err = dpm_suspend_start(PMSG_FREEZE);
if (err) {
- printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err);
+ pr_err("%s: dpm_suspend_start %d\n", __func__, err);
goto out_thaw;
}
@@ -131,29 +125,25 @@ static void do_suspend(void)
err = dpm_suspend_end(PMSG_FREEZE);
if (err) {
- printk(KERN_ERR "dpm_suspend_end failed: %d\n", err);
+ pr_err("dpm_suspend_end failed: %d\n", err);
si.cancelled = 0;
goto out_resume;
}
si.cancelled = 1;
- if (xen_hvm_domain()) {
- si.arg = 0UL;
- si.pre = NULL;
- si.post = &xen_hvm_post_suspend;
- } else {
- si.arg = virt_to_mfn(xen_start_info);
- si.pre = &xen_pre_suspend;
- si.post = &xen_post_suspend;
- }
-
err = stop_machine(xen_suspend, &si, cpumask_of(0));
+ /* Resume console as early as possible. */
+ if (!si.cancelled)
+ xen_console_resume();
+
+ raw_notifier_call_chain(&xen_resume_notifier, 0, NULL);
+
dpm_resume_start(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
if (err) {
- printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
+ pr_err("failed to start xen_suspend: %d\n", err);
si.cancelled = 1;
}
@@ -166,9 +156,6 @@ out_resume:
dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
- /* Make sure timer events get retriggered on all CPUs */
- clock_was_set();
-
out_thaw:
#ifdef CONFIG_PREEMPT
thaw_processes();
@@ -183,10 +170,32 @@ struct shutdown_handler {
void (*cb)(void);
};
+static int poweroff_nb(struct notifier_block *cb, unsigned long code, void *unused)
+{
+ switch (code) {
+ case SYS_DOWN:
+ case SYS_HALT:
+ case SYS_POWER_OFF:
+ shutting_down = SHUTDOWN_POWEROFF;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
static void do_poweroff(void)
{
- shutting_down = SHUTDOWN_POWEROFF;
- orderly_poweroff(false);
+ switch (system_state) {
+ case SYSTEM_BOOTING:
+ orderly_poweroff(true);
+ break;
+ case SYSTEM_RUNNING:
+ orderly_poweroff(false);
+ break;
+ default:
+ /* Don't do it when we are halting/rebooting. */
+ pr_info("Ignoring Xen toolstack shutdown.\n");
+ break;
+ }
}
static void do_reboot(void)
@@ -245,7 +254,7 @@ static void shutdown_handler(struct xenbus_watch *watch,
if (handler->cb) {
handler->cb();
} else {
- printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
+ pr_info("Ignoring shutdown request: %s\n", str);
shutting_down = SHUTDOWN_INVALID;
}
@@ -265,8 +274,7 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
if (err)
return;
if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
- printk(KERN_ERR "Unable to read sysrq code in "
- "control/sysrq\n");
+ pr_err("Unable to read sysrq code in control/sysrq\n");
xenbus_transaction_end(xbt, 1);
return;
}
@@ -293,20 +301,25 @@ static struct xenbus_watch shutdown_watch = {
.callback = shutdown_handler
};
+static struct notifier_block xen_reboot_nb = {
+ .notifier_call = poweroff_nb,
+};
+
static int setup_shutdown_watcher(void)
{
int err;
err = register_xenbus_watch(&shutdown_watch);
if (err) {
- printk(KERN_ERR "Failed to set shutdown watcher\n");
+ pr_err("Failed to set shutdown watcher\n");
return err;
}
+
#ifdef CONFIG_MAGIC_SYSRQ
err = register_xenbus_watch(&sysrq_watch);
if (err) {
- printk(KERN_ERR "Failed to set sysrq watcher\n");
+ pr_err("Failed to set sysrq watcher\n");
return err;
}
#endif
@@ -331,6 +344,7 @@ int xen_setup_shutdown_event(void)
if (!xen_domain())
return -ENODEV;
register_xenstore_notifier(&xenstore_notifier);
+ register_reboot_notifier(&xen_reboot_nb);
return 0;
}
diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
index 8feee08bcb4..6ab6a79c38a 100644
--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -32,6 +32,8 @@
* IN THE SOFTWARE.
*/
+#define pr_fmt(fmt) "xen_mcelog: " fmt
+
#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -51,8 +53,6 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
-#define XEN_MCELOG "xen_mcelog: "
-
static struct mc_info g_mi;
static struct mcinfo_logical_cpu *g_physinfo;
static uint32_t ncpus;
@@ -227,7 +227,7 @@ static int convert_log(struct mc_info *mi)
mic = NULL;
x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
if (unlikely(!mic)) {
- pr_warning(XEN_MCELOG "Failed to find global error info\n");
+ pr_warn("Failed to find global error info\n");
return -ENODEV;
}
@@ -241,8 +241,7 @@ static int convert_log(struct mc_info *mi)
if (g_physinfo[i].mc_apicid == m.apicid)
break;
if (unlikely(i == ncpus)) {
- pr_warning(XEN_MCELOG "Failed to match cpu with apicid %d\n",
- m.apicid);
+ pr_warn("Failed to match cpu with apicid %d\n", m.apicid);
return -ENODEV;
}
@@ -254,7 +253,7 @@ static int convert_log(struct mc_info *mi)
mic = NULL;
x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
if (unlikely(!mic)) {
- pr_warning(XEN_MCELOG "Fail to find bank error info\n");
+ pr_warn("Fail to find bank error info\n");
return -ENODEV;
}
@@ -295,9 +294,8 @@ static int mc_queue_handle(uint32_t flags)
mc_op.u.mc_fetch.flags = flags;
ret = HYPERVISOR_mca(&mc_op);
if (ret) {
- pr_err(XEN_MCELOG "Failed to fetch %s error log\n",
- (flags == XEN_MC_URGENT) ?
- "urgnet" : "nonurgent");
+ pr_err("Failed to fetch %surgent error log\n",
+ flags == XEN_MC_URGENT ? "" : "non");
break;
}
@@ -307,15 +305,12 @@ static int mc_queue_handle(uint32_t flags)
else {
ret = convert_log(&g_mi);
if (ret)
- pr_warning(XEN_MCELOG
- "Failed to convert this error log, "
- "continue acking it anyway\n");
+ pr_warn("Failed to convert this error log, continue acking it anyway\n");
mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK;
ret = HYPERVISOR_mca(&mc_op);
if (ret) {
- pr_err(XEN_MCELOG
- "Failed to ack previous error log\n");
+ pr_err("Failed to ack previous error log\n");
break;
}
}
@@ -334,15 +329,12 @@ static void xen_mce_work_fn(struct work_struct *work)
/* urgent mc_info */
err = mc_queue_handle(XEN_MC_URGENT);
if (err)
- pr_err(XEN_MCELOG
- "Failed to handle urgent mc_info queue, "
- "continue handling nonurgent mc_info queue anyway.\n");
+ pr_err("Failed to handle urgent mc_info queue, continue handling nonurgent mc_info queue anyway\n");
/* nonurgent mc_info */
err = mc_queue_handle(XEN_MC_NONURGENT);
if (err)
- pr_err(XEN_MCELOG
- "Failed to handle nonurgent mc_info queue.\n");
+ pr_err("Failed to handle nonurgent mc_info queue\n");
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&xen_mce_chrdev_wait);
@@ -370,7 +362,7 @@ static int bind_virq_for_mce(void)
set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
ret = HYPERVISOR_mca(&mc_op);
if (ret) {
- pr_err(XEN_MCELOG "Failed to get CPU numbers\n");
+ pr_err("Failed to get CPU numbers\n");
return ret;
}
@@ -383,7 +375,7 @@ static int bind_virq_for_mce(void)
set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
ret = HYPERVISOR_mca(&mc_op);
if (ret) {
- pr_err(XEN_MCELOG "Failed to get CPU info\n");
+ pr_err("Failed to get CPU info\n");
kfree(g_physinfo);
return ret;
}
@@ -391,7 +383,7 @@ static int bind_virq_for_mce(void)
ret = bind_virq_to_irqhandler(VIRQ_MCA, 0,
xen_mce_interrupt, 0, "mce", NULL);
if (ret < 0) {
- pr_err(XEN_MCELOG "Failed to bind virq\n");
+ pr_err("Failed to bind virq\n");
kfree(g_physinfo);
return ret;
}
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 18fff88254e..dd9c249ea31 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -26,6 +26,9 @@
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include "../pci/pci.h"
+#ifdef CONFIG_PCI_MMCONFIG
+#include <asm/pci_x86.h>
+#endif
static bool __read_mostly pci_seg_supported = true;
@@ -58,12 +61,12 @@ static int xen_add_device(struct device *dev)
add.flags = XEN_PCI_DEV_EXTFN;
#ifdef CONFIG_ACPI
- handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+ handle = ACPI_HANDLE(&pci_dev->dev);
if (!handle && pci_dev->bus->bridge)
- handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+ handle = ACPI_HANDLE(pci_dev->bus->bridge);
#ifdef CONFIG_PCI_IOV
if (!handle && pci_dev->is_virtfn)
- handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+ handle = ACPI_HANDLE(physfn->bus->bridge);
#endif
if (handle) {
acpi_status status;
@@ -192,3 +195,49 @@ static int __init register_xen_pci_notifier(void)
}
arch_initcall(register_xen_pci_notifier);
+
+#ifdef CONFIG_PCI_MMCONFIG
+static int __init xen_mcfg_late(void)
+{
+ struct pci_mmcfg_region *cfg;
+ int rc;
+
+ if (!xen_initial_domain())
+ return 0;
+
+ if ((pci_probe & PCI_PROBE_MMCONF) == 0)
+ return 0;
+
+ if (list_empty(&pci_mmcfg_list))
+ return 0;
+
+ /* Check whether they are in the right area. */
+ list_for_each_entry(cfg, &pci_mmcfg_list, list) {
+ struct physdev_pci_mmcfg_reserved r;
+
+ r.address = cfg->address;
+ r.segment = cfg->segment;
+ r.start_bus = cfg->start_bus;
+ r.end_bus = cfg->end_bus;
+ r.flags = XEN_PCI_MMCFG_RESERVED;
+
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_pci_mmcfg_reserved, &r);
+ switch (rc) {
+ case 0:
+ case -ENOSYS:
+ continue;
+
+ default:
+ pr_warn("Failed to report MMCONFIG reservation"
+ " state for %s to hypervisor"
+ " (%d)\n",
+ cfg->name, rc);
+ }
+ }
+ return 0;
+}
+/*
+ * Needs to be done after acpi_init which are subsys_initcall.
+ */
+subsys_initcall_sync(xen_mcfg_late);
+#endif
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
index 067fcfa1723..0aac403d53f 100644
--- a/drivers/xen/pcpu.c
+++ b/drivers/xen/pcpu.c
@@ -31,6 +31,8 @@
* IN THE SOFTWARE.
*/
+#define pr_fmt(fmt) "xen_cpu: " fmt
+
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/cpu.h>
@@ -38,13 +40,13 @@
#include <linux/capability.h>
#include <xen/xen.h>
+#include <xen/acpi.h>
#include <xen/xenbus.h>
#include <xen/events.h>
#include <xen/interface/platform.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
-#define XEN_PCPU "xen_cpu: "
/*
* @cpu_id: Xen physical cpu logic number
@@ -242,8 +244,7 @@ static struct pcpu *create_and_register_pcpu(struct xenpf_pcpuinfo *info)
err = register_pcpu(pcpu);
if (err) {
- pr_warning(XEN_PCPU "Failed to register pcpu%u\n",
- info->xen_cpuid);
+ pr_warn("Failed to register pcpu%u\n", info->xen_cpuid);
return ERR_PTR(-ENOENT);
}
@@ -278,8 +279,7 @@ static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu)
* Only those at cpu present map has its sys interface.
*/
if (info->flags & XEN_PCPU_FLAGS_INVALID) {
- if (pcpu)
- unregister_and_remove_pcpu(pcpu);
+ unregister_and_remove_pcpu(pcpu);
return 0;
}
@@ -333,6 +333,41 @@ static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
+/* Sync with Xen hypervisor after cpu hotadded */
+void xen_pcpu_hotplug_sync(void)
+{
+ schedule_work(&xen_pcpu_work);
+}
+EXPORT_SYMBOL_GPL(xen_pcpu_hotplug_sync);
+
+/*
+ * For hypervisor presented cpu, return logic cpu id;
+ * For hypervisor non-presented cpu, return -ENODEV.
+ */
+int xen_pcpu_id(uint32_t acpi_id)
+{
+ int cpu_id = 0, max_id = 0;
+ struct xen_platform_op op;
+
+ op.cmd = XENPF_get_cpuinfo;
+ while (cpu_id <= max_id) {
+ op.u.pcpu_info.xen_cpuid = cpu_id;
+ if (HYPERVISOR_dom0_op(&op)) {
+ cpu_id++;
+ continue;
+ }
+
+ if (acpi_id == op.u.pcpu_info.acpi_id)
+ return cpu_id;
+ if (op.u.pcpu_info.max_present > max_id)
+ max_id = op.u.pcpu_info.max_present;
+ cpu_id++;
+ }
+
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(xen_pcpu_id);
+
static int __init xen_pcpu_init(void)
{
int irq, ret;
@@ -344,19 +379,19 @@ static int __init xen_pcpu_init(void)
xen_pcpu_interrupt, 0,
"xen-pcpu", NULL);
if (irq < 0) {
- pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
+ pr_warn("Failed to bind pcpu virq\n");
return irq;
}
ret = subsys_system_register(&xen_pcpu_subsys, NULL);
if (ret) {
- pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
+ pr_warn("Failed to register pcpu subsys\n");
goto err1;
}
ret = xen_sync_pcpus();
if (ret) {
- pr_warning(XEN_PCPU "Failed to sync pcpu info\n");
+ pr_warn("Failed to sync pcpu info\n");
goto err2;
}
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 99db9e1eb8b..3454973dc3b 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -45,7 +45,7 @@ static unsigned long platform_mmio_alloc;
static unsigned long platform_mmiolen;
static uint64_t callback_via;
-unsigned long alloc_xen_mmio(unsigned long len)
+static unsigned long alloc_xen_mmio(unsigned long len)
{
unsigned long addr;
@@ -84,7 +84,7 @@ static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id)
static int xen_allocate_irq(struct pci_dev *pdev)
{
return request_irq(pdev->irq, do_hvm_evtchn_intr,
- IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
+ IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
"xen-platform-pci", pdev);
}
@@ -108,6 +108,7 @@ static int platform_pci_init(struct pci_dev *pdev,
long ioaddr;
long mmio_addr, mmio_len;
unsigned int max_nr_gframes;
+ unsigned long grant_frames;
if (!xen_domain())
return -ENODEV;
@@ -154,13 +155,17 @@ static int platform_pci_init(struct pci_dev *pdev,
}
max_nr_gframes = gnttab_max_grant_frames();
- xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
- ret = gnttab_init();
+ grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
+ ret = gnttab_setup_auto_xlat_frames(grant_frames);
if (ret)
goto out;
+ ret = gnttab_init();
+ if (ret)
+ goto grant_out;
xenbus_probe(NULL);
return 0;
-
+grant_out:
+ gnttab_free_auto_xlat_frames();
out:
pci_release_region(pdev, 0);
mem_out:
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index ca2b00e9d55..569a13b9e85 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -6,6 +6,8 @@
* Copyright (c) 2002-2004, K A Fraser, B Dragovic
*/
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
@@ -41,9 +43,10 @@ MODULE_LICENSE("GPL");
#define PRIV_VMA_LOCKED ((void *)1)
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
-#endif
+static int privcmd_vma_range_is_mapped(
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long nr_pages);
static long privcmd_ioctl_hypercall(void __user *udata)
{
@@ -223,9 +226,9 @@ static long privcmd_ioctl_mmap(void __user *udata)
vma = find_vma(mm, msg->va);
rc = -EINVAL;
- if (!vma || (msg->va != vma->vm_start) ||
- !privcmd_enforce_singleshot_mapping(vma))
+ if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
goto out_up;
+ vma->vm_private_data = PRIV_VMA_LOCKED;
}
state.va = vma->vm_start;
@@ -356,7 +359,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
kfree(pages);
return -ENOMEM;
}
- BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED);
+ BUG_ON(vma->vm_private_data != NULL);
vma->vm_private_data = pages;
return 0;
@@ -419,19 +422,43 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
vma = find_vma(mm, m.addr);
if (!vma ||
- vma->vm_ops != &privcmd_vm_ops ||
- (m.addr != vma->vm_start) ||
- ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
- !privcmd_enforce_singleshot_mapping(vma)) {
- up_write(&mm->mmap_sem);
+ vma->vm_ops != &privcmd_vm_ops) {
ret = -EINVAL;
- goto out;
+ goto out_unlock;
}
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- ret = alloc_empty_pages(vma, m.num);
- if (ret < 0) {
- up_write(&mm->mmap_sem);
- goto out;
+
+ /*
+ * Caller must either:
+ *
+ * Map the whole VMA range, which will also allocate all the
+ * pages required for the auto_translated_physmap case.
+ *
+ * Or
+ *
+ * Map unmapped holes left from a previous map attempt (e.g.,
+ * because those foreign frames were previously paged out).
+ */
+ if (vma->vm_private_data == NULL) {
+ if (m.addr != vma->vm_start ||
+ m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ ret = alloc_empty_pages(vma, m.num);
+ if (ret < 0)
+ goto out_unlock;
+ } else
+ vma->vm_private_data = PRIV_VMA_LOCKED;
+ } else {
+ if (m.addr < vma->vm_start ||
+ m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
+ ret = -EINVAL;
+ goto out_unlock;
}
}
@@ -464,8 +491,11 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
out:
free_page_list(&pagelist);
-
return ret;
+
+out_unlock:
+ up_write(&mm->mmap_sem);
+ goto out;
}
static long privcmd_ioctl(struct file *file,
@@ -503,12 +533,17 @@ static void privcmd_close(struct vm_area_struct *vma)
{
struct page **pages = vma->vm_private_data;
int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+ int rc;
- if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages))
+ if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages)
return;
- xen_unmap_domain_mfn_range(vma, numpgs, pages);
- free_xenballooned_pages(numpgs, pages);
+ rc = xen_unmap_domain_mfn_range(vma, numpgs, pages);
+ if (rc == 0)
+ free_xenballooned_pages(numpgs, pages);
+ else
+ pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n",
+ numpgs, rc);
kfree(pages);
}
@@ -538,9 +573,24 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+/*
+ * For MMAPBATCH*. This allows asserting the singleshot mapping
+ * on a per pfn/pte basis. Mapping calls that fail with ENOENT
+ * can be then retried until success.
+ */
+static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
+ unsigned long addr, void *data)
+{
+ return pte_none(*pte) ? 0 : -EBUSY;
+}
+
+static int privcmd_vma_range_is_mapped(
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long nr_pages)
{
- return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED);
+ return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
+ is_mapped_fn, NULL) != 0;
}
const struct file_operations xen_privcmd_fops = {
@@ -565,7 +615,7 @@ static int __init privcmd_init(void)
err = misc_register(&privcmd_dev);
if (err != 0) {
- printk(KERN_ERR "Could not register Xen privcmd device\n");
+ pr_err("Could not register Xen privcmd device\n");
return err;
}
return 0;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index af47e759446..ebd8f218a78 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -33,6 +33,8 @@
*
*/
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
#include <linux/bootmem.h>
#include <linux/dma-mapping.h>
#include <linux/export.h>
@@ -40,12 +42,31 @@
#include <xen/page.h>
#include <xen/xen-ops.h>
#include <xen/hvc-console.h>
+
+#include <asm/dma-mapping.h>
+#include <asm/xen/page-coherent.h>
+
+#include <trace/events/swiotlb.h>
/*
* Used to do a quick range check in swiotlb_tbl_unmap_single and
* swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
* API.
*/
+#ifndef CONFIG_X86
+static unsigned long dma_alloc_coherent_mask(struct device *dev,
+ gfp_t gfp)
+{
+ unsigned long dma_mask = 0;
+
+ dma_mask = dev->coherent_dma_mask;
+ if (!dma_mask)
+ dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32);
+
+ return dma_mask;
+}
+#endif
+
static char *xen_io_tlb_start, *xen_io_tlb_end;
static unsigned long xen_io_tlb_nslabs;
/*
@@ -54,17 +75,35 @@ static unsigned long xen_io_tlb_nslabs;
static u64 start_dma_addr;
-static dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
+/*
+ * Both of these functions should avoid PFN_PHYS because phys_addr_t
+ * can be 32bit when dma_addr_t is 64bit leading to a loss in
+ * information if the shift is done before casting to 64bit.
+ */
+static inline dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
{
- return phys_to_machine(XPADDR(paddr)).maddr;
+ unsigned long mfn = pfn_to_mfn(PFN_DOWN(paddr));
+ dma_addr_t dma = (dma_addr_t)mfn << PAGE_SHIFT;
+
+ dma |= paddr & ~PAGE_MASK;
+
+ return dma;
}
-static phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
+static inline phys_addr_t xen_bus_to_phys(dma_addr_t baddr)
{
- return machine_to_phys(XMADDR(baddr)).paddr;
+ unsigned long pfn = mfn_to_pfn(PFN_DOWN(baddr));
+ dma_addr_t dma = (dma_addr_t)pfn << PAGE_SHIFT;
+ phys_addr_t paddr = dma;
+
+ BUG_ON(paddr != dma); /* truncation has occurred, should never happen */
+
+ paddr |= baddr & ~PAGE_MASK;
+
+ return paddr;
}
-static dma_addr_t xen_virt_to_bus(void *address)
+static inline dma_addr_t xen_virt_to_bus(void *address)
{
return xen_phys_to_bus(virt_to_phys(address));
}
@@ -87,7 +126,7 @@ static int check_pages_physically_contiguous(unsigned long pfn,
return 1;
}
-static int range_straddles_page_boundary(phys_addr_t p, size_t size)
+static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
{
unsigned long pfn = PFN_DOWN(p);
unsigned int offset = p & ~PAGE_MASK;
@@ -124,6 +163,8 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
{
int i, rc;
int dma_bits;
+ dma_addr_t dma_handle;
+ phys_addr_t p = virt_to_phys(buf);
dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
@@ -133,9 +174,9 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
do {
rc = xen_create_contiguous_region(
- (unsigned long)buf + (i << IO_TLB_SHIFT),
+ p + (i << IO_TLB_SHIFT),
get_order(slabs << IO_TLB_SHIFT),
- dma_bits);
+ dma_bits, &dma_handle);
} while (rc && dma_bits++ < max_dma_bits);
if (rc)
return rc;
@@ -202,8 +243,8 @@ retry:
order--;
}
if (order != get_order(bytes)) {
- pr_warn("Warning: only able to allocate %ld MB "
- "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
+ pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
+ (PAGE_SIZE << order) >> 20);
xen_io_tlb_nslabs = SLABS_PER_PAGE << order;
bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
}
@@ -231,7 +272,9 @@ retry:
}
start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
if (early) {
- swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
+ if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
+ verbose))
+ panic("Cannot allocate SWIOTLB buffer");
rc = 0;
} else
rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
@@ -240,11 +283,11 @@ error:
if (repeat--) {
xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
(xen_io_tlb_nslabs >> 1));
- printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n",
- (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
+ pr_info("Lowering to %luMB\n",
+ (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
goto retry;
}
- pr_err("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
+ pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc);
if (early)
panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
else
@@ -259,7 +302,6 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
void *ret;
int order = get_order(size);
u64 dma_mask = DMA_BIT_MASK(32);
- unsigned long vstart;
phys_addr_t phys;
dma_addr_t dev_addr;
@@ -274,8 +316,12 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
if (dma_alloc_from_coherent(hwdev, size, dma_handle, &ret))
return ret;
- vstart = __get_free_pages(flags, order);
- ret = (void *)vstart;
+ /* On ARM this function returns an ioremap'ped virtual address for
+ * which virt_to_phys doesn't return the corresponding physical
+ * address. In fact on ARM virt_to_phys only works for kernel direct
+ * mapped RAM memory. Also see comment below.
+ */
+ ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs);
if (!ret)
return ret;
@@ -283,18 +329,21 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
if (hwdev && hwdev->coherent_dma_mask)
dma_mask = dma_alloc_coherent_mask(hwdev, flags);
- phys = virt_to_phys(ret);
+ /* At this point dma_handle is the physical address, next we are
+ * going to set it to the machine address.
+ * Do not use virt_to_phys(ret) because on ARM it doesn't correspond
+ * to *dma_handle. */
+ phys = *dma_handle;
dev_addr = xen_phys_to_bus(phys);
if (((dev_addr + size - 1 <= dma_mask)) &&
!range_straddles_page_boundary(phys, size))
*dma_handle = dev_addr;
else {
- if (xen_create_contiguous_region(vstart, order,
- fls64(dma_mask)) != 0) {
- free_pages(vstart, order);
+ if (xen_create_contiguous_region(phys, order,
+ fls64(dma_mask), dma_handle) != 0) {
+ xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
return NULL;
}
- *dma_handle = virt_to_machine(ret).maddr;
}
memset(ret, 0, size);
return ret;
@@ -315,13 +364,15 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
if (hwdev && hwdev->coherent_dma_mask)
dma_mask = hwdev->coherent_dma_mask;
- phys = virt_to_phys(vaddr);
+ /* do not use virt_to_phys because on ARM it doesn't return you the
+ * physical address */
+ phys = xen_bus_to_phys(dev_addr);
if (((dev_addr + size - 1 > dma_mask)) ||
range_straddles_page_boundary(phys, size))
- xen_destroy_contiguous_region((unsigned long)vaddr, order);
+ xen_destroy_contiguous_region(phys, order);
- free_pages((unsigned long)vaddr, order);
+ xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
}
EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
@@ -348,16 +399,25 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
* buffering it.
*/
if (dma_capable(dev, dev_addr, size) &&
- !range_straddles_page_boundary(phys, size) && !swiotlb_force)
+ !range_straddles_page_boundary(phys, size) && !swiotlb_force) {
+ /* we are not interested in the dma_addr returned by
+ * xen_dma_map_page, only in the potential cache flushes executed
+ * by the function. */
+ xen_dma_map_page(dev, page, offset, size, dir, attrs);
return dev_addr;
+ }
/*
* Oh well, have to allocate and map a bounce buffer.
*/
+ trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
+
map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir);
if (map == SWIOTLB_MAP_ERROR)
return DMA_ERROR_CODE;
+ xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT),
+ map & ~PAGE_MASK, size, dir, attrs);
dev_addr = xen_phys_to_bus(map);
/*
@@ -380,12 +440,15 @@ EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
* whatever the device wrote there.
*/
static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir)
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
{
phys_addr_t paddr = xen_bus_to_phys(dev_addr);
BUG_ON(dir == DMA_NONE);
+ xen_dma_unmap_page(hwdev, paddr, size, dir, attrs);
+
/* NOTE: We use dev_addr here, not paddr! */
if (is_xen_swiotlb_buffer(dev_addr)) {
swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
@@ -408,7 +471,7 @@ void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- xen_unmap_single(hwdev, dev_addr, size, dir);
+ xen_unmap_single(hwdev, dev_addr, size, dir, attrs);
}
EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_page);
@@ -431,11 +494,15 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
BUG_ON(dir == DMA_NONE);
+ if (target == SYNC_FOR_CPU)
+ xen_dma_sync_single_for_cpu(hwdev, paddr, size, dir);
+
/* NOTE: We use dev_addr here, not paddr! */
- if (is_xen_swiotlb_buffer(dev_addr)) {
+ if (is_xen_swiotlb_buffer(dev_addr))
swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
- return;
- }
+
+ if (target == SYNC_FOR_DEVICE)
+ xen_dma_sync_single_for_cpu(hwdev, paddr, size, dir);
if (dir != DMA_FROM_DEVICE)
return;
@@ -498,17 +565,32 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
sg->length,
dir);
if (map == SWIOTLB_MAP_ERROR) {
+ dev_warn(hwdev, "swiotlb buffer is full\n");
/* Don't panic here, we expect map_sg users
to do proper error handling. */
xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
attrs);
- sgl[0].dma_length = 0;
- return DMA_ERROR_CODE;
+ sg_dma_len(sgl) = 0;
+ return 0;
}
+ xen_dma_map_page(hwdev, pfn_to_page(map >> PAGE_SHIFT),
+ map & ~PAGE_MASK,
+ sg->length,
+ dir,
+ attrs);
sg->dma_address = xen_phys_to_bus(map);
- } else
+ } else {
+ /* we are not interested in the dma_addr returned by
+ * xen_dma_map_page, only in the potential cache flushes executed
+ * by the function. */
+ xen_dma_map_page(hwdev, pfn_to_page(paddr >> PAGE_SHIFT),
+ paddr & ~PAGE_MASK,
+ sg->length,
+ dir,
+ attrs);
sg->dma_address = dev_addr;
- sg->dma_length = sg->length;
+ }
+ sg_dma_len(sg) = sg->length;
}
return nelems;
}
@@ -529,7 +611,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i)
- xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+ xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, attrs);
}
EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
@@ -551,7 +633,7 @@ xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
for_each_sg(sgl, sg, nelems, i)
xen_swiotlb_sync_single(hwdev, sg->dma_address,
- sg->dma_length, dir, target);
+ sg_dma_len(sg), dir, target);
}
void
@@ -589,3 +671,15 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask;
}
EXPORT_SYMBOL_GPL(xen_swiotlb_dma_supported);
+
+int
+xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask)
+{
+ if (!dev->dma_mask || !xen_swiotlb_dma_supported(dev, dma_mask))
+ return -EIO;
+
+ *dev->dma_mask = dma_mask;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xen_swiotlb_set_dma_mask);
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 144564e5eb2..83b5c53bec6 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -5,16 +5,15 @@
* Author: Dan Magenheimer
*/
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/cleancache.h>
-
-/* temporary ifdef until include/linux/frontswap.h is upstream */
-#ifdef CONFIG_FRONTSWAP
#include <linux/frontswap.h>
-#endif
#include <xen/xen.h>
#include <xen/interface/xen.h>
@@ -23,6 +22,36 @@
#include <asm/xen/hypervisor.h>
#include <xen/tmem.h>
+#ifndef CONFIG_XEN_TMEM_MODULE
+bool __read_mostly tmem_enabled = false;
+
+static int __init enable_tmem(char *s)
+{
+ tmem_enabled = true;
+ return 1;
+}
+__setup("tmem", enable_tmem);
+#endif
+
+#ifdef CONFIG_CLEANCACHE
+static bool cleancache __read_mostly = true;
+module_param(cleancache, bool, S_IRUGO);
+static bool selfballooning __read_mostly = true;
+module_param(selfballooning, bool, S_IRUGO);
+#endif /* CONFIG_CLEANCACHE */
+
+#ifdef CONFIG_FRONTSWAP
+static bool frontswap __read_mostly = true;
+module_param(frontswap, bool, S_IRUGO);
+#else /* CONFIG_FRONTSWAP */
+#define frontswap (0)
+#endif /* CONFIG_FRONTSWAP */
+
+#ifdef CONFIG_XEN_SELFBALLOONING
+static bool selfshrinking __read_mostly = true;
+module_param(selfshrinking, bool, S_IRUGO);
+#endif /* CONFIG_XEN_SELFBALLOONING */
+
#define TMEM_CONTROL 0
#define TMEM_NEW_POOL 1
#define TMEM_DESTROY_POOL 2
@@ -128,14 +157,6 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid)
return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
}
-bool __read_mostly tmem_enabled = false;
-
-static int __init enable_tmem(char *s)
-{
- tmem_enabled = true;
- return 1;
-}
-__setup("tmem", enable_tmem);
#ifdef CONFIG_CLEANCACHE
static int xen_tmem_destroy_pool(u32 pool_id)
@@ -227,16 +248,7 @@ static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
}
-static bool __initdata use_cleancache = true;
-
-static int __init no_cleancache(char *s)
-{
- use_cleancache = false;
- return 1;
-}
-__setup("nocleancache", no_cleancache);
-
-static struct cleancache_ops __initdata tmem_cleancache_ops = {
+static struct cleancache_ops tmem_cleancache_ops = {
.put_page = tmem_cleancache_put_page,
.get_page = tmem_cleancache_get_page,
.invalidate_page = tmem_cleancache_flush_page,
@@ -353,16 +365,7 @@ static void tmem_frontswap_init(unsigned ignored)
xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE);
}
-static bool __initdata use_frontswap = true;
-
-static int __init no_frontswap(char *s)
-{
- use_frontswap = false;
- return 1;
-}
-__setup("nofrontswap", no_frontswap);
-
-static struct frontswap_ops __initdata tmem_frontswap_ops = {
+static struct frontswap_ops tmem_frontswap_ops = {
.store = tmem_frontswap_store,
.load = tmem_frontswap_load,
.invalidate_page = tmem_frontswap_flush_page,
@@ -371,36 +374,53 @@ static struct frontswap_ops __initdata tmem_frontswap_ops = {
};
#endif
-static int __init xen_tmem_init(void)
+static int xen_tmem_init(void)
{
if (!xen_domain())
return 0;
#ifdef CONFIG_FRONTSWAP
- if (tmem_enabled && use_frontswap) {
+ if (tmem_enabled && frontswap) {
char *s = "";
- struct frontswap_ops old_ops =
- frontswap_register_ops(&tmem_frontswap_ops);
+ struct frontswap_ops *old_ops;
tmem_frontswap_poolid = -1;
- if (old_ops.init != NULL)
+ old_ops = frontswap_register_ops(&tmem_frontswap_ops);
+ if (IS_ERR(old_ops) || old_ops) {
+ if (IS_ERR(old_ops))
+ return PTR_ERR(old_ops);
s = " (WARNING: frontswap_ops overridden)";
- printk(KERN_INFO "frontswap enabled, RAM provided by "
- "Xen Transcendent Memory\n");
+ }
+ pr_info("frontswap enabled, RAM provided by Xen Transcendent Memory%s\n",
+ s);
}
#endif
#ifdef CONFIG_CLEANCACHE
BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
- if (tmem_enabled && use_cleancache) {
+ if (tmem_enabled && cleancache) {
char *s = "";
- struct cleancache_ops old_ops =
+ struct cleancache_ops *old_ops =
cleancache_register_ops(&tmem_cleancache_ops);
- if (old_ops.init_fs != NULL)
+ if (old_ops)
s = " (WARNING: cleancache_ops overridden)";
- printk(KERN_INFO "cleancache enabled, RAM provided by "
- "Xen Transcendent Memory%s\n", s);
+ pr_info("cleancache enabled, RAM provided by Xen Transcendent Memory%s\n",
+ s);
+ }
+#endif
+#ifdef CONFIG_XEN_SELFBALLOONING
+ /*
+ * There is no point of driving pages to the swap system if they
+ * aren't going anywhere in tmem universe.
+ */
+ if (!frontswap) {
+ selfshrinking = false;
+ selfballooning = false;
}
+ xen_selfballoon_init(selfballooning, selfshrinking);
#endif
return 0;
}
module_init(xen_tmem_init)
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Magenheimer <dan.magenheimer@oracle.com>");
+MODULE_DESCRIPTION("Shim to Xen transcendent memory");
diff --git a/drivers/xen/xen-acpi-cpuhotplug.c b/drivers/xen/xen-acpi-cpuhotplug.c
new file mode 100644
index 00000000000..3e62ee4b3b6
--- /dev/null
+++ b/drivers/xen/xen-acpi-cpuhotplug.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Liu Jinsong <jinsong.liu@intel.com>
+ * Author: Jiang Yunhong <yunhong.jiang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/cpu.h>
+#include <linux/acpi.h>
+#include <linux/uaccess.h>
+#include <acpi/processor.h>
+#include <xen/acpi.h>
+#include <xen/interface/platform.h>
+#include <asm/xen/hypercall.h>
+
+#define PREFIX "ACPI:xen_cpu_hotplug:"
+
+#define INSTALL_NOTIFY_HANDLER 0
+#define UNINSTALL_NOTIFY_HANDLER 1
+
+static acpi_status xen_acpi_cpu_hotadd(struct acpi_processor *pr);
+
+/* --------------------------------------------------------------------------
+ Driver Interface
+-------------------------------------------------------------------------- */
+
+static int xen_acpi_processor_enable(struct acpi_device *device)
+{
+ acpi_status status = 0;
+ unsigned long long value;
+ union acpi_object object = { 0 };
+ struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
+ struct acpi_processor *pr;
+
+ pr = acpi_driver_data(device);
+ if (!pr) {
+ pr_err(PREFIX "Cannot find driver data\n");
+ return -EINVAL;
+ }
+
+ if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_OBJECT_HID)) {
+ /* Declared with "Processor" statement; match ProcessorID */
+ status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer);
+ if (ACPI_FAILURE(status)) {
+ pr_err(PREFIX "Evaluating processor object\n");
+ return -ENODEV;
+ }
+
+ pr->acpi_id = object.processor.proc_id;
+ } else {
+ /* Declared with "Device" statement; match _UID */
+ status = acpi_evaluate_integer(pr->handle, METHOD_NAME__UID,
+ NULL, &value);
+ if (ACPI_FAILURE(status)) {
+ pr_err(PREFIX "Evaluating processor _UID\n");
+ return -ENODEV;
+ }
+
+ pr->acpi_id = value;
+ }
+
+ pr->id = xen_pcpu_id(pr->acpi_id);
+
+ if ((int)pr->id < 0)
+ /* This cpu is not presented at hypervisor, try to hotadd it */
+ if (ACPI_FAILURE(xen_acpi_cpu_hotadd(pr))) {
+ pr_err(PREFIX "Hotadd CPU (acpi_id = %d) failed.\n",
+ pr->acpi_id);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static int xen_acpi_processor_add(struct acpi_device *device)
+{
+ int ret;
+ struct acpi_processor *pr;
+
+ if (!device)
+ return -EINVAL;
+
+ pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
+ if (!pr)
+ return -ENOMEM;
+
+ pr->handle = device->handle;
+ strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME);
+ strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS);
+ device->driver_data = pr;
+
+ ret = xen_acpi_processor_enable(device);
+ if (ret)
+ pr_err(PREFIX "Error when enabling Xen processor\n");
+
+ return ret;
+}
+
+static int xen_acpi_processor_remove(struct acpi_device *device)
+{
+ struct acpi_processor *pr;
+
+ if (!device)
+ return -EINVAL;
+
+ pr = acpi_driver_data(device);
+ if (!pr)
+ return -EINVAL;
+
+ kfree(pr);
+ return 0;
+}
+
+/*--------------------------------------------------------------
+ Acpi processor hotplug support
+--------------------------------------------------------------*/
+
+static int is_processor_present(acpi_handle handle)
+{
+ acpi_status status;
+ unsigned long long sta = 0;
+
+
+ status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
+
+ if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_PRESENT))
+ return 1;
+
+ /*
+ * _STA is mandatory for a processor that supports hot plug
+ */
+ if (status == AE_NOT_FOUND)
+ pr_info(PREFIX "Processor does not support hot plug\n");
+ else
+ pr_info(PREFIX "Processor Device is not present");
+ return 0;
+}
+
+static int xen_apic_id(acpi_handle handle)
+{
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ union acpi_object *obj;
+ struct acpi_madt_local_apic *lapic;
+ int apic_id;
+
+ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
+ return -EINVAL;
+
+ if (!buffer.length || !buffer.pointer)
+ return -EINVAL;
+
+ obj = buffer.pointer;
+ if (obj->type != ACPI_TYPE_BUFFER ||
+ obj->buffer.length < sizeof(*lapic)) {
+ kfree(buffer.pointer);
+ return -EINVAL;
+ }
+
+ lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer;
+
+ if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC ||
+ !(lapic->lapic_flags & ACPI_MADT_ENABLED)) {
+ kfree(buffer.pointer);
+ return -EINVAL;
+ }
+
+ apic_id = (uint32_t)lapic->id;
+ kfree(buffer.pointer);
+ buffer.length = ACPI_ALLOCATE_BUFFER;
+ buffer.pointer = NULL;
+
+ return apic_id;
+}
+
+static int xen_hotadd_cpu(struct acpi_processor *pr)
+{
+ int cpu_id, apic_id, pxm;
+ struct xen_platform_op op;
+
+ apic_id = xen_apic_id(pr->handle);
+ if (apic_id < 0) {
+ pr_err(PREFIX "Failed to get apic_id for acpi_id %d\n",
+ pr->acpi_id);
+ return -ENODEV;
+ }
+
+ pxm = xen_acpi_get_pxm(pr->handle);
+ if (pxm < 0) {
+ pr_err(PREFIX "Failed to get _PXM for acpi_id %d\n",
+ pr->acpi_id);
+ return pxm;
+ }
+
+ op.cmd = XENPF_cpu_hotadd;
+ op.u.cpu_add.apic_id = apic_id;
+ op.u.cpu_add.acpi_id = pr->acpi_id;
+ op.u.cpu_add.pxm = pxm;
+
+ cpu_id = HYPERVISOR_dom0_op(&op);
+ if (cpu_id < 0)
+ pr_err(PREFIX "Failed to hotadd CPU for acpi_id %d\n",
+ pr->acpi_id);
+
+ return cpu_id;
+}
+
+static acpi_status xen_acpi_cpu_hotadd(struct acpi_processor *pr)
+{
+ if (!is_processor_present(pr->handle))
+ return AE_ERROR;
+
+ pr->id = xen_hotadd_cpu(pr);
+ if ((int)pr->id < 0)
+ return AE_ERROR;
+
+ /*
+ * Sync with Xen hypervisor, providing new /sys/.../xen_cpuX
+ * interface after cpu hotadded.
+ */
+ xen_pcpu_hotplug_sync();
+
+ return AE_OK;
+}
+
+static int acpi_processor_device_remove(struct acpi_device *device)
+{
+ pr_debug(PREFIX "Xen does not support CPU hotremove\n");
+
+ return -ENOSYS;
+}
+
+static void acpi_processor_hotplug_notify(acpi_handle handle,
+ u32 event, void *data)
+{
+ struct acpi_processor *pr;
+ struct acpi_device *device = NULL;
+ u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; /* default */
+ int result;
+
+ acpi_scan_lock_acquire();
+
+ switch (event) {
+ case ACPI_NOTIFY_BUS_CHECK:
+ case ACPI_NOTIFY_DEVICE_CHECK:
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "Processor driver received %s event\n",
+ (event == ACPI_NOTIFY_BUS_CHECK) ?
+ "ACPI_NOTIFY_BUS_CHECK" : "ACPI_NOTIFY_DEVICE_CHECK"));
+
+ if (!is_processor_present(handle))
+ break;
+
+ acpi_bus_get_device(handle, &device);
+ if (acpi_device_enumerated(device))
+ break;
+
+ result = acpi_bus_scan(handle);
+ if (result) {
+ pr_err(PREFIX "Unable to add the device\n");
+ break;
+ }
+ device = NULL;
+ acpi_bus_get_device(handle, &device);
+ if (!acpi_device_enumerated(device)) {
+ pr_err(PREFIX "Missing device object\n");
+ break;
+ }
+ ost_code = ACPI_OST_SC_SUCCESS;
+ break;
+
+ case ACPI_NOTIFY_EJECT_REQUEST:
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "received ACPI_NOTIFY_EJECT_REQUEST\n"));
+
+ if (acpi_bus_get_device(handle, &device)) {
+ pr_err(PREFIX "Device don't exist, dropping EJECT\n");
+ break;
+ }
+ pr = acpi_driver_data(device);
+ if (!pr) {
+ pr_err(PREFIX "Driver data is NULL, dropping EJECT\n");
+ break;
+ }
+
+ /*
+ * TBD: implement acpi_processor_device_remove if Xen support
+ * CPU hotremove in the future.
+ */
+ acpi_processor_device_remove(device);
+ break;
+
+ default:
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "Unsupported event [0x%x]\n", event));
+
+ /* non-hotplug event; possibly handled by other handler */
+ goto out;
+ }
+
+ (void) acpi_evaluate_ost(handle, event, ost_code, NULL);
+
+out:
+ acpi_scan_lock_release();
+}
+
+static acpi_status is_processor_device(acpi_handle handle)
+{
+ struct acpi_device_info *info;
+ char *hid;
+ acpi_status status;
+
+ status = acpi_get_object_info(handle, &info);
+ if (ACPI_FAILURE(status))
+ return status;
+
+ if (info->type == ACPI_TYPE_PROCESSOR) {
+ kfree(info);
+ return AE_OK; /* found a processor object */
+ }
+
+ if (!(info->valid & ACPI_VALID_HID)) {
+ kfree(info);
+ return AE_ERROR;
+ }
+
+ hid = info->hardware_id.string;
+ if ((hid == NULL) || strcmp(hid, ACPI_PROCESSOR_DEVICE_HID)) {
+ kfree(info);
+ return AE_ERROR;
+ }
+
+ kfree(info);
+ return AE_OK; /* found a processor device object */
+}
+
+static acpi_status
+processor_walk_namespace_cb(acpi_handle handle,
+ u32 lvl, void *context, void **rv)
+{
+ acpi_status status;
+ int *action = context;
+
+ status = is_processor_device(handle);
+ if (ACPI_FAILURE(status))
+ return AE_OK; /* not a processor; continue to walk */
+
+ switch (*action) {
+ case INSTALL_NOTIFY_HANDLER:
+ acpi_install_notify_handler(handle,
+ ACPI_SYSTEM_NOTIFY,
+ acpi_processor_hotplug_notify,
+ NULL);
+ break;
+ case UNINSTALL_NOTIFY_HANDLER:
+ acpi_remove_notify_handler(handle,
+ ACPI_SYSTEM_NOTIFY,
+ acpi_processor_hotplug_notify);
+ break;
+ default:
+ break;
+ }
+
+ /* found a processor; skip walking underneath */
+ return AE_CTRL_DEPTH;
+}
+
+static
+void acpi_processor_install_hotplug_notify(void)
+{
+ int action = INSTALL_NOTIFY_HANDLER;
+ acpi_walk_namespace(ACPI_TYPE_ANY,
+ ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX,
+ processor_walk_namespace_cb, NULL, &action, NULL);
+}
+
+static
+void acpi_processor_uninstall_hotplug_notify(void)
+{
+ int action = UNINSTALL_NOTIFY_HANDLER;
+ acpi_walk_namespace(ACPI_TYPE_ANY,
+ ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX,
+ processor_walk_namespace_cb, NULL, &action, NULL);
+}
+
+static const struct acpi_device_id processor_device_ids[] = {
+ {ACPI_PROCESSOR_OBJECT_HID, 0},
+ {ACPI_PROCESSOR_DEVICE_HID, 0},
+ {"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, processor_device_ids);
+
+static struct acpi_driver xen_acpi_processor_driver = {
+ .name = "processor",
+ .class = ACPI_PROCESSOR_CLASS,
+ .ids = processor_device_ids,
+ .ops = {
+ .add = xen_acpi_processor_add,
+ .remove = xen_acpi_processor_remove,
+ },
+};
+
+static int __init xen_acpi_processor_init(void)
+{
+ int result = 0;
+
+ if (!xen_initial_domain())
+ return -ENODEV;
+
+ /* unregister the stub which only used to reserve driver space */
+ xen_stub_processor_exit();
+
+ result = acpi_bus_register_driver(&xen_acpi_processor_driver);
+ if (result < 0) {
+ xen_stub_processor_init();
+ return result;
+ }
+
+ acpi_processor_install_hotplug_notify();
+ return 0;
+}
+
+static void __exit xen_acpi_processor_exit(void)
+{
+ if (!xen_initial_domain())
+ return;
+
+ acpi_processor_uninstall_hotplug_notify();
+
+ acpi_bus_unregister_driver(&xen_acpi_processor_driver);
+
+ /*
+ * stub reserve space again to prevent any chance of native
+ * driver loading.
+ */
+ xen_stub_processor_init();
+ return;
+}
+
+module_init(xen_acpi_processor_init);
+module_exit(xen_acpi_processor_exit);
+ACPI_MODULE_NAME("xen-acpi-cpuhotplug");
+MODULE_AUTHOR("Liu Jinsong <jinsong.liu@intel.com>");
+MODULE_DESCRIPTION("Xen Hotplug CPU Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xen-acpi-memhotplug.c b/drivers/xen/xen-acpi-memhotplug.c
new file mode 100644
index 00000000000..34e40b733f9
--- /dev/null
+++ b/drivers/xen/xen-acpi-memhotplug.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Liu Jinsong <jinsong.liu@intel.com>
+ * Author: Jiang Yunhong <yunhong.jiang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/acpi.h>
+#include <xen/acpi.h>
+#include <xen/interface/platform.h>
+#include <asm/xen/hypercall.h>
+
+#define PREFIX "ACPI:xen_memory_hotplug:"
+
+struct acpi_memory_info {
+ struct list_head list;
+ u64 start_addr; /* Memory Range start physical addr */
+ u64 length; /* Memory Range length */
+ unsigned short caching; /* memory cache attribute */
+ unsigned short write_protect; /* memory read/write attribute */
+ /* copied from buffer getting from _CRS */
+ unsigned int enabled:1;
+};
+
+struct acpi_memory_device {
+ struct acpi_device *device;
+ struct list_head res_list;
+};
+
+static bool acpi_hotmem_initialized __read_mostly;
+
+static int xen_hotadd_memory(int pxm, struct acpi_memory_info *info)
+{
+ int rc;
+ struct xen_platform_op op;
+
+ op.cmd = XENPF_mem_hotadd;
+ op.u.mem_add.spfn = info->start_addr >> PAGE_SHIFT;
+ op.u.mem_add.epfn = (info->start_addr + info->length) >> PAGE_SHIFT;
+ op.u.mem_add.pxm = pxm;
+
+ rc = HYPERVISOR_dom0_op(&op);
+ if (rc)
+ pr_err(PREFIX "Xen Hotplug Memory Add failed on "
+ "0x%lx -> 0x%lx, _PXM: %d, error: %d\n",
+ (unsigned long)info->start_addr,
+ (unsigned long)(info->start_addr + info->length),
+ pxm, rc);
+
+ return rc;
+}
+
+static int xen_acpi_memory_enable_device(struct acpi_memory_device *mem_device)
+{
+ int pxm, result;
+ int num_enabled = 0;
+ struct acpi_memory_info *info;
+
+ if (!mem_device)
+ return -EINVAL;
+
+ pxm = xen_acpi_get_pxm(mem_device->device->handle);
+ if (pxm < 0)
+ return pxm;
+
+ list_for_each_entry(info, &mem_device->res_list, list) {
+ if (info->enabled) { /* just sanity check...*/
+ num_enabled++;
+ continue;
+ }
+
+ if (!info->length)
+ continue;
+
+ result = xen_hotadd_memory(pxm, info);
+ if (result)
+ continue;
+ info->enabled = 1;
+ num_enabled++;
+ }
+
+ if (!num_enabled)
+ return -ENODEV;
+
+ return 0;
+}
+
+static acpi_status
+acpi_memory_get_resource(struct acpi_resource *resource, void *context)
+{
+ struct acpi_memory_device *mem_device = context;
+ struct acpi_resource_address64 address64;
+ struct acpi_memory_info *info, *new;
+ acpi_status status;
+
+ status = acpi_resource_to_address64(resource, &address64);
+ if (ACPI_FAILURE(status) ||
+ (address64.resource_type != ACPI_MEMORY_RANGE))
+ return AE_OK;
+
+ list_for_each_entry(info, &mem_device->res_list, list) {
+ if ((info->caching == address64.info.mem.caching) &&
+ (info->write_protect == address64.info.mem.write_protect) &&
+ (info->start_addr + info->length == address64.minimum)) {
+ info->length += address64.address_length;
+ return AE_OK;
+ }
+ }
+
+ new = kzalloc(sizeof(struct acpi_memory_info), GFP_KERNEL);
+ if (!new)
+ return AE_ERROR;
+
+ INIT_LIST_HEAD(&new->list);
+ new->caching = address64.info.mem.caching;
+ new->write_protect = address64.info.mem.write_protect;
+ new->start_addr = address64.minimum;
+ new->length = address64.address_length;
+ list_add_tail(&new->list, &mem_device->res_list);
+
+ return AE_OK;
+}
+
+static int
+acpi_memory_get_device_resources(struct acpi_memory_device *mem_device)
+{
+ acpi_status status;
+ struct acpi_memory_info *info, *n;
+
+ if (!list_empty(&mem_device->res_list))
+ return 0;
+
+ status = acpi_walk_resources(mem_device->device->handle,
+ METHOD_NAME__CRS, acpi_memory_get_resource, mem_device);
+
+ if (ACPI_FAILURE(status)) {
+ list_for_each_entry_safe(info, n, &mem_device->res_list, list)
+ kfree(info);
+ INIT_LIST_HEAD(&mem_device->res_list);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int acpi_memory_get_device(acpi_handle handle,
+ struct acpi_memory_device **mem_device)
+{
+ struct acpi_device *device = NULL;
+ int result = 0;
+
+ acpi_scan_lock_acquire();
+
+ acpi_bus_get_device(handle, &device);
+ if (acpi_device_enumerated(device))
+ goto end;
+
+ /*
+ * Now add the notified device. This creates the acpi_device
+ * and invokes .add function
+ */
+ result = acpi_bus_scan(handle);
+ if (result) {
+ pr_warn(PREFIX "ACPI namespace scan failed\n");
+ result = -EINVAL;
+ goto out;
+ }
+ device = NULL;
+ acpi_bus_get_device(handle, &device);
+ if (!acpi_device_enumerated(device)) {
+ pr_warn(PREFIX "Missing device object\n");
+ result = -EINVAL;
+ goto out;
+ }
+
+end:
+ *mem_device = acpi_driver_data(device);
+ if (!(*mem_device)) {
+ pr_err(PREFIX "driver data not found\n");
+ result = -ENODEV;
+ goto out;
+ }
+
+out:
+ acpi_scan_lock_release();
+ return result;
+}
+
+static int acpi_memory_check_device(struct acpi_memory_device *mem_device)
+{
+ unsigned long long current_status;
+
+ /* Get device present/absent information from the _STA */
+ if (ACPI_FAILURE(acpi_evaluate_integer(mem_device->device->handle,
+ "_STA", NULL, &current_status)))
+ return -ENODEV;
+ /*
+ * Check for device status. Device should be
+ * present/enabled/functioning.
+ */
+ if (!((current_status & ACPI_STA_DEVICE_PRESENT)
+ && (current_status & ACPI_STA_DEVICE_ENABLED)
+ && (current_status & ACPI_STA_DEVICE_FUNCTIONING)))
+ return -ENODEV;
+
+ return 0;
+}
+
+static int acpi_memory_disable_device(struct acpi_memory_device *mem_device)
+{
+ pr_debug(PREFIX "Xen does not support memory hotremove\n");
+
+ return -ENOSYS;
+}
+
+static void acpi_memory_device_notify(acpi_handle handle, u32 event, void *data)
+{
+ struct acpi_memory_device *mem_device;
+ struct acpi_device *device;
+ u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; /* default */
+
+ switch (event) {
+ case ACPI_NOTIFY_BUS_CHECK:
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "\nReceived BUS CHECK notification for device\n"));
+ /* Fall Through */
+ case ACPI_NOTIFY_DEVICE_CHECK:
+ if (event == ACPI_NOTIFY_DEVICE_CHECK)
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "\nReceived DEVICE CHECK notification for device\n"));
+
+ if (acpi_memory_get_device(handle, &mem_device)) {
+ pr_err(PREFIX "Cannot find driver data\n");
+ break;
+ }
+
+ ost_code = ACPI_OST_SC_SUCCESS;
+ break;
+
+ case ACPI_NOTIFY_EJECT_REQUEST:
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "\nReceived EJECT REQUEST notification for device\n"));
+
+ acpi_scan_lock_acquire();
+ if (acpi_bus_get_device(handle, &device)) {
+ acpi_scan_lock_release();
+ pr_err(PREFIX "Device doesn't exist\n");
+ break;
+ }
+ mem_device = acpi_driver_data(device);
+ if (!mem_device) {
+ acpi_scan_lock_release();
+ pr_err(PREFIX "Driver Data is NULL\n");
+ break;
+ }
+
+ /*
+ * TBD: implement acpi_memory_disable_device and invoke
+ * acpi_bus_remove if Xen support hotremove in the future
+ */
+ acpi_memory_disable_device(mem_device);
+ acpi_scan_lock_release();
+ break;
+
+ default:
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "Unsupported event [0x%x]\n", event));
+ /* non-hotplug event; possibly handled by other handler */
+ return;
+ }
+
+ (void) acpi_evaluate_ost(handle, event, ost_code, NULL);
+ return;
+}
+
+static int xen_acpi_memory_device_add(struct acpi_device *device)
+{
+ int result;
+ struct acpi_memory_device *mem_device = NULL;
+
+
+ if (!device)
+ return -EINVAL;
+
+ mem_device = kzalloc(sizeof(struct acpi_memory_device), GFP_KERNEL);
+ if (!mem_device)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&mem_device->res_list);
+ mem_device->device = device;
+ sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME);
+ sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS);
+ device->driver_data = mem_device;
+
+ /* Get the range from the _CRS */
+ result = acpi_memory_get_device_resources(mem_device);
+ if (result) {
+ kfree(mem_device);
+ return result;
+ }
+
+ /*
+ * For booting existed memory devices, early boot code has recognized
+ * memory area by EFI/E820. If DSDT shows these memory devices on boot,
+ * hotplug is not necessary for them.
+ * For hot-added memory devices during runtime, it need hypercall to
+ * Xen hypervisor to add memory.
+ */
+ if (!acpi_hotmem_initialized)
+ return 0;
+
+ if (!acpi_memory_check_device(mem_device))
+ result = xen_acpi_memory_enable_device(mem_device);
+
+ return result;
+}
+
+static int xen_acpi_memory_device_remove(struct acpi_device *device)
+{
+ struct acpi_memory_device *mem_device = NULL;
+
+ if (!device || !acpi_driver_data(device))
+ return -EINVAL;
+
+ mem_device = acpi_driver_data(device);
+ kfree(mem_device);
+
+ return 0;
+}
+
+/*
+ * Helper function to check for memory device
+ */
+static acpi_status is_memory_device(acpi_handle handle)
+{
+ char *hardware_id;
+ acpi_status status;
+ struct acpi_device_info *info;
+
+ status = acpi_get_object_info(handle, &info);
+ if (ACPI_FAILURE(status))
+ return status;
+
+ if (!(info->valid & ACPI_VALID_HID)) {
+ kfree(info);
+ return AE_ERROR;
+ }
+
+ hardware_id = info->hardware_id.string;
+ if ((hardware_id == NULL) ||
+ (strcmp(hardware_id, ACPI_MEMORY_DEVICE_HID)))
+ status = AE_ERROR;
+
+ kfree(info);
+ return status;
+}
+
+static acpi_status
+acpi_memory_register_notify_handler(acpi_handle handle,
+ u32 level, void *ctxt, void **retv)
+{
+ acpi_status status;
+
+ status = is_memory_device(handle);
+ if (ACPI_FAILURE(status))
+ return AE_OK; /* continue */
+
+ status = acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
+ acpi_memory_device_notify, NULL);
+ /* continue */
+ return AE_OK;
+}
+
+static acpi_status
+acpi_memory_deregister_notify_handler(acpi_handle handle,
+ u32 level, void *ctxt, void **retv)
+{
+ acpi_status status;
+
+ status = is_memory_device(handle);
+ if (ACPI_FAILURE(status))
+ return AE_OK; /* continue */
+
+ status = acpi_remove_notify_handler(handle,
+ ACPI_SYSTEM_NOTIFY,
+ acpi_memory_device_notify);
+
+ return AE_OK; /* continue */
+}
+
+static const struct acpi_device_id memory_device_ids[] = {
+ {ACPI_MEMORY_DEVICE_HID, 0},
+ {"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, memory_device_ids);
+
+static struct acpi_driver xen_acpi_memory_device_driver = {
+ .name = "acpi_memhotplug",
+ .class = ACPI_MEMORY_DEVICE_CLASS,
+ .ids = memory_device_ids,
+ .ops = {
+ .add = xen_acpi_memory_device_add,
+ .remove = xen_acpi_memory_device_remove,
+ },
+};
+
+static int __init xen_acpi_memory_device_init(void)
+{
+ int result;
+ acpi_status status;
+
+ if (!xen_initial_domain())
+ return -ENODEV;
+
+ /* unregister the stub which only used to reserve driver space */
+ xen_stub_memory_device_exit();
+
+ result = acpi_bus_register_driver(&xen_acpi_memory_device_driver);
+ if (result < 0) {
+ xen_stub_memory_device_init();
+ return -ENODEV;
+ }
+
+ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX,
+ acpi_memory_register_notify_handler,
+ NULL, NULL, NULL);
+
+ if (ACPI_FAILURE(status)) {
+ pr_warn(PREFIX "walk_namespace failed\n");
+ acpi_bus_unregister_driver(&xen_acpi_memory_device_driver);
+ xen_stub_memory_device_init();
+ return -ENODEV;
+ }
+
+ acpi_hotmem_initialized = true;
+ return 0;
+}
+
+static void __exit xen_acpi_memory_device_exit(void)
+{
+ acpi_status status;
+
+ if (!xen_initial_domain())
+ return;
+
+ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX,
+ acpi_memory_deregister_notify_handler,
+ NULL, NULL, NULL);
+ if (ACPI_FAILURE(status))
+ pr_warn(PREFIX "walk_namespace failed\n");
+
+ acpi_bus_unregister_driver(&xen_acpi_memory_device_driver);
+
+ /*
+ * stub reserve space again to prevent any chance of native
+ * driver loading.
+ */
+ xen_stub_memory_device_init();
+ return;
+}
+
+module_init(xen_acpi_memory_device_init);
+module_exit(xen_acpi_memory_device_exit);
+ACPI_MODULE_NAME("xen-acpi-memhotplug");
+MODULE_AUTHOR("Liu Jinsong <jinsong.liu@intel.com>");
+MODULE_DESCRIPTION("Xen Hotplug Mem Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c
index da39191e727..f83b754505f 100644
--- a/drivers/xen/xen-acpi-pad.c
+++ b/drivers/xen/xen-acpi-pad.c
@@ -14,13 +14,14 @@
* more details.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/types.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
-#include <asm/xen/hypercall.h>
+#include <linux/acpi.h>
#include <xen/interface/version.h>
#include <xen/xen-ops.h>
+#include <asm/xen/hypercall.h>
#define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad"
#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
@@ -76,27 +77,14 @@ static int acpi_pad_pur(acpi_handle handle)
return num;
}
-/* Notify firmware how many CPUs are idle */
-static void acpi_pad_ost(acpi_handle handle, int stat,
- uint32_t idle_nums)
-{
- union acpi_object params[3] = {
- {.type = ACPI_TYPE_INTEGER,},
- {.type = ACPI_TYPE_INTEGER,},
- {.type = ACPI_TYPE_BUFFER,},
- };
- struct acpi_object_list arg_list = {3, params};
-
- params[0].integer.value = ACPI_PROCESSOR_AGGREGATOR_NOTIFY;
- params[1].integer.value = stat;
- params[2].buffer.length = 4;
- params[2].buffer.pointer = (void *)&idle_nums;
- acpi_evaluate_object(handle, "_OST", &arg_list, NULL);
-}
-
static void acpi_pad_handle_notify(acpi_handle handle)
{
int idle_nums;
+ struct acpi_buffer param = {
+ .length = 4,
+ .pointer = (void *)&idle_nums,
+ };
+
mutex_lock(&xen_cpu_lock);
idle_nums = acpi_pad_pur(handle);
@@ -108,7 +96,8 @@ static void acpi_pad_handle_notify(acpi_handle handle)
idle_nums = xen_acpi_pad_idle_cpus(idle_nums)
?: xen_acpi_pad_idle_cpus_num();
if (idle_nums >= 0)
- acpi_pad_ost(handle, 0, idle_nums);
+ acpi_evaluate_ost(handle, ACPI_PROCESSOR_AGGREGATOR_NOTIFY,
+ 0, &param);
mutex_unlock(&xen_cpu_lock);
}
@@ -140,8 +129,7 @@ static int acpi_pad_add(struct acpi_device *device)
return 0;
}
-static int acpi_pad_remove(struct acpi_device *device,
- int type)
+static int acpi_pad_remove(struct acpi_device *device)
{
mutex_lock(&xen_cpu_lock);
xen_acpi_pad_idle_cpus(0);
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 316df65163c..59fc190f1e9 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -17,6 +17,8 @@
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/cpumask.h>
#include <linux/cpufreq.h>
#include <linux/freezer.h>
@@ -25,16 +27,13 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
+#include <linux/acpi.h>
#include <acpi/processor.h>
-
#include <xen/xen.h>
+#include <xen/xen-ops.h>
#include <xen/interface/platform.h>
#include <asm/xen/hypercall.h>
-#define DRV_NAME "xen-acpi-processor: "
-
static int no_hypercall;
MODULE_PARM_DESC(off, "Inhibit the hypercall.");
module_param_named(off, no_hypercall, int, 0400);
@@ -51,9 +50,9 @@ static DEFINE_MUTEX(acpi_ids_mutex);
/* Which ACPI ID we have processed from 'struct acpi_processor'. */
static unsigned long *acpi_ids_done;
/* Which ACPI ID exist in the SSDT/DSDT processor definitions. */
-static unsigned long __initdata *acpi_id_present;
+static unsigned long *acpi_id_present;
/* And if there is an _CST definition (or a PBLK) for the ACPI IDs */
-static unsigned long __initdata *acpi_id_cst_present;
+static unsigned long *acpi_id_cst_present;
static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
{
@@ -103,7 +102,7 @@ static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
set_xen_guest_handle(dst_cx->dp, NULL);
}
if (!ok) {
- pr_debug(DRV_NAME "No _Cx for ACPI CPU %u\n", _pr->acpi_id);
+ pr_debug("No _Cx for ACPI CPU %u\n", _pr->acpi_id);
kfree(dst_cx_states);
return -EINVAL;
}
@@ -128,11 +127,11 @@ static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
pr_debug(" C%d: %s %d uS\n",
cx->type, cx->desc, (u32)cx->latency);
}
- } else if (ret != -EINVAL)
+ } else if ((ret != -EINVAL) && (ret != -ENOSYS))
/* EINVAL means the ACPI ID is incorrect - meaning the ACPI
* table is referencing a non-existing CPU - which can happen
* with broken ACPI tables. */
- pr_err(DRV_NAME "(CX): Hypervisor error (%d) for ACPI CPU%u\n",
+ pr_err("(CX): Hypervisor error (%d) for ACPI CPU%u\n",
ret, _pr->acpi_id);
kfree(dst_cx_states);
@@ -238,7 +237,7 @@ static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
dst_perf->flags |= XEN_PX_PSD;
if (dst_perf->flags != (XEN_PX_PSD | XEN_PX_PSS | XEN_PX_PCT | XEN_PX_PPC)) {
- pr_warn(DRV_NAME "ACPI CPU%u missing some P-state data (%x), skipping.\n",
+ pr_warn("ACPI CPU%u missing some P-state data (%x), skipping\n",
_pr->acpi_id, dst_perf->flags);
ret = -ENODEV;
goto err_free;
@@ -260,12 +259,12 @@ static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
(u32) perf->states[i].power,
(u32) perf->states[i].transition_latency);
}
- } else if (ret != -EINVAL)
+ } else if ((ret != -EINVAL) && (ret != -ENOSYS))
/* EINVAL means the ACPI ID is incorrect - meaning the ACPI
* table is referencing a non-existing CPU - which can happen
* with broken ACPI tables. */
- pr_warn(DRV_NAME "(_PXX): Hypervisor error (%d) for ACPI CPU%u\n",
- ret, _pr->acpi_id);
+ pr_warn("(_PXX): Hypervisor error (%d) for ACPI CPU%u\n",
+ ret, _pr->acpi_id);
err_free:
if (!IS_ERR_OR_NULL(dst_states))
kfree(dst_states);
@@ -317,7 +316,7 @@ static unsigned int __init get_max_acpi_id(void)
max_acpi_id = max(info->acpi_id, max_acpi_id);
}
max_acpi_id *= 2; /* Slack for CPU hotplug support. */
- pr_debug(DRV_NAME "Max ACPI ID: %u\n", max_acpi_id);
+ pr_debug("Max ACPI ID: %u\n", max_acpi_id);
return max_acpi_id;
}
/*
@@ -329,7 +328,7 @@ static unsigned int __init get_max_acpi_id(void)
* for_each_[present|online]_cpu macros which are banded to the virtual
* CPU amount.
*/
-static acpi_status __init
+static acpi_status
read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
{
u32 acpi_id;
@@ -364,15 +363,14 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
/* There are more ACPI Processor objects than in x2APIC or MADT.
* This can happen with incorrect ACPI SSDT declerations. */
if (acpi_id > nr_acpi_bits) {
- pr_debug(DRV_NAME "We only have %u, trying to set %u\n",
+ pr_debug("We only have %u, trying to set %u\n",
nr_acpi_bits, acpi_id);
return AE_OK;
}
/* OK, There is a ACPI Processor object */
__set_bit(acpi_id, acpi_id_present);
- pr_debug(DRV_NAME "ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id,
- (unsigned long)pblk);
+ pr_debug("ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, (unsigned long)pblk);
status = acpi_evaluate_object(handle, "_CST", NULL, &buffer);
if (ACPI_FAILURE(status)) {
@@ -384,12 +382,16 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
return AE_OK;
}
-static int __init check_acpi_ids(struct acpi_processor *pr_backup)
+static int check_acpi_ids(struct acpi_processor *pr_backup)
{
if (!pr_backup)
return -ENODEV;
+ if (acpi_id_present && acpi_id_cst_present)
+ /* OK, done this once .. skip to uploading */
+ goto upload;
+
/* All online CPUs have been processed at this stage. Now verify
* whether in fact "online CPUs" == physical CPUs.
*/
@@ -408,6 +410,7 @@ static int __init check_acpi_ids(struct acpi_processor *pr_backup)
read_acpi_id, NULL, NULL, NULL);
acpi_get_devices("ACPI0007", read_acpi_id, NULL, NULL);
+upload:
if (!bitmap_equal(acpi_id_present, acpi_ids_done, nr_acpi_bits)) {
unsigned int i;
for_each_set_bit(i, acpi_id_present, nr_acpi_bits) {
@@ -417,10 +420,7 @@ static int __init check_acpi_ids(struct acpi_processor *pr_backup)
(void)upload_pm_data(pr_backup);
}
}
- kfree(acpi_id_present);
- acpi_id_present = NULL;
- kfree(acpi_id_cst_present);
- acpi_id_cst_present = NULL;
+
return 0;
}
static int __init check_prereq(void)
@@ -467,10 +467,48 @@ static void free_acpi_perf_data(void)
free_percpu(acpi_perf_data);
}
-static int __init xen_acpi_processor_init(void)
+static int xen_upload_processor_pm_data(void)
{
struct acpi_processor *pr_backup = NULL;
unsigned int i;
+ int rc = 0;
+
+ pr_info("Uploading Xen processor PM info\n");
+
+ for_each_possible_cpu(i) {
+ struct acpi_processor *_pr;
+ _pr = per_cpu(processors, i /* APIC ID */);
+ if (!_pr)
+ continue;
+
+ if (!pr_backup) {
+ pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
+ if (pr_backup)
+ memcpy(pr_backup, _pr, sizeof(struct acpi_processor));
+ }
+ (void)upload_pm_data(_pr);
+ }
+
+ rc = check_acpi_ids(pr_backup);
+ kfree(pr_backup);
+
+ return rc;
+}
+
+static int xen_acpi_processor_resume(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ bitmap_zero(acpi_ids_done, nr_acpi_bits);
+ return xen_upload_processor_pm_data();
+}
+
+struct notifier_block xen_acpi_processor_resume_nb = {
+ .notifier_call = xen_acpi_processor_resume,
+};
+
+static int __init xen_acpi_processor_init(void)
+{
+ unsigned int i;
int rc = check_prereq();
if (rc)
@@ -483,7 +521,7 @@ static int __init xen_acpi_processor_init(void)
acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
if (!acpi_perf_data) {
- pr_debug(DRV_NAME "Memory allocation error for acpi_perf_data.\n");
+ pr_debug("Memory allocation error for acpi_perf_data\n");
kfree(acpi_ids_done);
return -ENOMEM;
}
@@ -500,38 +538,26 @@ static int __init xen_acpi_processor_init(void)
(void)acpi_processor_preregister_performance(acpi_perf_data);
for_each_possible_cpu(i) {
+ struct acpi_processor *pr;
struct acpi_processor_performance *perf;
+ pr = per_cpu(processors, i);
perf = per_cpu_ptr(acpi_perf_data, i);
- rc = acpi_processor_register_performance(perf, i);
- if (rc)
- goto err_out;
- }
- rc = acpi_processor_notify_smm(THIS_MODULE);
- if (rc)
- goto err_unregister;
-
- for_each_possible_cpu(i) {
- struct acpi_processor *_pr;
- _pr = per_cpu(processors, i /* APIC ID */);
- if (!_pr)
+ if (!pr)
continue;
- if (!pr_backup) {
- pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
- if (pr_backup)
- memcpy(pr_backup, _pr, sizeof(struct acpi_processor));
- }
- (void)upload_pm_data(_pr);
+ pr->performance = perf;
+ rc = acpi_processor_get_performance_info(pr);
+ if (rc)
+ goto err_out;
}
- rc = check_acpi_ids(pr_backup);
-
- kfree(pr_backup);
- pr_backup = NULL;
+ rc = xen_upload_processor_pm_data();
if (rc)
goto err_unregister;
+ xen_resume_notifier_register(&xen_acpi_processor_resume_nb);
+
return 0;
err_unregister:
for_each_possible_cpu(i) {
@@ -549,7 +575,10 @@ static void __exit xen_acpi_processor_exit(void)
{
int i;
+ xen_resume_notifier_unregister(&xen_acpi_processor_resume_nb);
kfree(acpi_ids_done);
+ kfree(acpi_id_present);
+ kfree(acpi_id_cst_present);
for_each_possible_cpu(i) {
struct acpi_processor_performance *perf;
perf = per_cpu_ptr(acpi_perf_data, i);
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 8f37e23f6d1..e555845d61f 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -30,6 +30,8 @@
* IN THE SOFTWARE.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/capability.h>
@@ -81,7 +83,7 @@ static int balloon_init_watcher(struct notifier_block *notifier,
err = register_xenbus_watch(&target_watch);
if (err)
- printk(KERN_ERR "Failed to set balloon watcher\n");
+ pr_err("Failed to set balloon watcher\n");
return NOTIFY_DONE;
}
@@ -95,7 +97,7 @@ static int __init balloon_init(void)
if (!xen_domain())
return -ENODEV;
- pr_info("xen-balloon: Initialising balloon driver.\n");
+ pr_info("Initialising balloon driver\n");
register_balloon(&balloon_dev);
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index 3daf862d739..c5ee82587e8 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -4,6 +4,8 @@
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/pci.h>
#include "pciback.h"
@@ -75,10 +77,8 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
pci_name(dev));
err = pci_set_mwi(dev);
if (err) {
- printk(KERN_WARNING
- DRV_NAME ": %s: cannot enable "
- "memory-write-invalidate (%d)\n",
- pci_name(dev), err);
+ pr_warn("%s: cannot enable memory-write-invalidate (%d)\n",
+ pci_name(dev), err);
value &= ~PCI_COMMAND_INVALIDATE;
}
}
@@ -91,7 +91,7 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
- printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
+ pr_warn(DRV_NAME ": driver data not found for %s\n",
pci_name(dev));
return XEN_PCI_ERR_op_failed;
}
@@ -125,7 +125,7 @@ static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
- printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
+ pr_warn(DRV_NAME ": driver data not found for %s\n",
pci_name(dev));
return XEN_PCI_ERR_op_failed;
}
@@ -153,7 +153,7 @@ static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
- printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
+ pr_warn(DRV_NAME ": driver data not found for %s\n",
pci_name(dev));
return XEN_PCI_ERR_op_failed;
}
@@ -375,7 +375,7 @@ int xen_pcibk_config_header_add_fields(struct pci_dev *dev)
default:
err = -EINVAL;
- printk(KERN_ERR DRV_NAME ": %s: Unsupported header type %d!\n",
+ pr_err("%s: Unsupported header type %d!\n",
pci_name(dev), dev->hdr_type);
break;
}
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 9204126f156..d57a173685f 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -4,6 +4,9 @@
* Ryan Wilson <hap9@epoch.ncsc.mil>
* Chris Bookholt <hap10@epoch.ncsc.mil>
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/rwsem.h>
@@ -17,6 +20,7 @@
#include <xen/events.h>
#include <asm/xen/pci.h>
#include <asm/xen/hypervisor.h>
+#include <xen/interface/physdev.h>
#include "pciback.h"
#include "conf_space.h"
#include "conf_space_quirks.h"
@@ -85,37 +89,52 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
static void pcistub_device_release(struct kref *kref)
{
struct pcistub_device *psdev;
+ struct pci_dev *dev;
struct xen_pcibk_dev_data *dev_data;
psdev = container_of(kref, struct pcistub_device, kref);
- dev_data = pci_get_drvdata(psdev->dev);
+ dev = psdev->dev;
+ dev_data = pci_get_drvdata(dev);
- dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
+ dev_dbg(&dev->dev, "pcistub_device_release\n");
- xen_unregister_device_domain_owner(psdev->dev);
+ xen_unregister_device_domain_owner(dev);
/* Call the reset function which does not take lock as this
* is called from "unbind" which takes a device_lock mutex.
*/
- __pci_reset_function_locked(psdev->dev);
- if (pci_load_and_free_saved_state(psdev->dev,
- &dev_data->pci_saved_state)) {
- dev_dbg(&psdev->dev->dev, "Could not reload PCI state\n");
- } else
- pci_restore_state(psdev->dev);
+ __pci_reset_function_locked(dev);
+ if (pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state))
+ dev_dbg(&dev->dev, "Could not reload PCI state\n");
+ else
+ pci_restore_state(dev);
+
+ if (dev->msix_cap) {
+ struct physdev_pci_device ppdev = {
+ .seg = pci_domain_nr(dev->bus),
+ .bus = dev->bus->number,
+ .devfn = dev->devfn
+ };
+ int err = HYPERVISOR_physdev_op(PHYSDEVOP_release_msix,
+ &ppdev);
+
+ if (err)
+ dev_warn(&dev->dev, "MSI-X release failed (%d)\n",
+ err);
+ }
/* Disable the device */
- xen_pcibk_reset_device(psdev->dev);
+ xen_pcibk_reset_device(dev);
kfree(dev_data);
- pci_set_drvdata(psdev->dev, NULL);
+ pci_set_drvdata(dev, NULL);
/* Clean-up the device */
- xen_pcibk_config_free_dyn_fields(psdev->dev);
- xen_pcibk_config_free_dev(psdev->dev);
+ xen_pcibk_config_free_dyn_fields(dev);
+ xen_pcibk_config_free_dev(dev);
- psdev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
- pci_dev_put(psdev->dev);
+ dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+ pci_dev_put(dev);
kfree(psdev);
}
@@ -223,6 +242,15 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
return found_dev;
}
+/*
+ * Called when:
+ * - XenBus state has been reconfigure (pci unplug). See xen_pcibk_remove_device
+ * - XenBus state has been disconnected (guest shutdown). See xen_pcibk_xenbus_remove
+ * - 'echo BDF > unbind' on pciback module with no guest attached. See pcistub_remove
+ * - 'echo BDF > unbind' with a guest still using it. See pcistub_remove
+ *
+ * As such we have to be careful.
+ */
void pcistub_put_pci_dev(struct pci_dev *dev)
{
struct pcistub_device *psdev, *found_psdev = NULL;
@@ -253,16 +281,16 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
* and want to inhibit the user from fiddling with 'reset'
*/
pci_reset_function(dev);
- pci_restore_state(psdev->dev);
+ pci_restore_state(dev);
/* This disables the device. */
- xen_pcibk_reset_device(found_psdev->dev);
+ xen_pcibk_reset_device(dev);
/* And cleanup up our emulated fields. */
- xen_pcibk_config_free_dyn_fields(found_psdev->dev);
- xen_pcibk_config_reset_dev(found_psdev->dev);
+ xen_pcibk_config_reset_dev(dev);
+ xen_pcibk_config_free_dyn_fields(dev);
- xen_unregister_device_domain_owner(found_psdev->dev);
+ xen_unregister_device_domain_owner(dev);
spin_lock_irqsave(&found_psdev->lock, flags);
found_psdev->pdev = NULL;
@@ -355,6 +383,19 @@ static int pcistub_init_device(struct pci_dev *dev)
if (err)
goto config_release;
+ if (dev->msix_cap) {
+ struct physdev_pci_device ppdev = {
+ .seg = pci_domain_nr(dev->bus),
+ .bus = dev->bus->number,
+ .devfn = dev->devfn
+ };
+
+ err = HYPERVISOR_physdev_op(PHYSDEVOP_prepare_msix, &ppdev);
+ if (err)
+ dev_err(&dev->dev, "MSI-X preparation failed (%d)\n",
+ err);
+ }
+
/* We need the device active to save the state. */
dev_dbg(&dev->dev, "save state of device\n");
pci_save_state(dev);
@@ -396,8 +437,6 @@ static int __init pcistub_init_devices_late(void)
unsigned long flags;
int err = 0;
- pr_debug(DRV_NAME ": pcistub_init_devices_late\n");
-
spin_lock_irqsave(&pcistub_devices_lock, flags);
while (!list_empty(&seized_devices)) {
@@ -463,6 +502,8 @@ static int pcistub_seize(struct pci_dev *dev)
return err;
}
+/* Called when 'bind'. This means we must _NOT_ call pci_reset_function or
+ * other functions that take the sysfs lock. */
static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
int err = 0;
@@ -490,6 +531,8 @@ out:
return err;
}
+/* Called when 'unbind'. This means we must _NOT_ call pci_reset_function or
+ * other functions that take the sysfs lock. */
static void pcistub_remove(struct pci_dev *dev)
{
struct pcistub_device *psdev, *found_psdev = NULL;
@@ -515,16 +558,14 @@ static void pcistub_remove(struct pci_dev *dev)
found_psdev->pdev);
if (found_psdev->pdev) {
- printk(KERN_WARNING DRV_NAME ": ****** removing device "
- "%s while still in-use! ******\n",
+ pr_warn("****** removing device %s while still in-use! ******\n",
pci_name(found_psdev->dev));
- printk(KERN_WARNING DRV_NAME ": ****** driver domain may"
- " still access this device's i/o resources!\n");
- printk(KERN_WARNING DRV_NAME ": ****** shutdown driver "
- "domain before binding device\n");
- printk(KERN_WARNING DRV_NAME ": ****** to other drivers "
- "or domains\n");
+ pr_warn("****** driver domain may still access this device's i/o resources!\n");
+ pr_warn("****** shutdown driver domain before binding device\n");
+ pr_warn("****** to other drivers or domains\n");
+ /* N.B. This ends up calling pcistub_put_pci_dev which ends up
+ * doing the FLR. */
xen_pcibk_release_pci_dev(found_psdev->pdev,
found_psdev->dev);
}
@@ -989,7 +1030,7 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func)
pci_dev_id->bus = bus;
pci_dev_id->devfn = devfn;
- pr_debug(DRV_NAME ": wants to seize %04x:%02x:%02x.%d\n",
+ pr_debug("wants to seize %04x:%02x:%02x.%d\n",
domain, bus, slot, func);
spin_lock_irqsave(&device_ids_lock, flags);
@@ -1019,8 +1060,8 @@ static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
err = 0;
- pr_debug(DRV_NAME ": removed %04x:%02x:%02x.%d from "
- "seize list\n", domain, bus, slot, func);
+ pr_debug("removed %04x:%02x:%02x.%d from seize list\n",
+ domain, bus, slot, func);
}
}
spin_unlock_irqrestore(&device_ids_lock, flags);
@@ -1167,19 +1208,23 @@ static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
struct pcistub_device *psdev;
struct xen_pcibk_dev_data *dev_data;
int domain, bus, slot, func;
- int err = -ENOENT;
+ int err;
err = str_to_slot(buf, &domain, &bus, &slot, &func);
if (err)
return err;
psdev = pcistub_device_find(domain, bus, slot, func);
- if (!psdev)
+ if (!psdev) {
+ err = -ENOENT;
goto out;
+ }
dev_data = pci_get_drvdata(psdev->dev);
- if (!dev_data)
+ if (!dev_data) {
+ err = -ENOENT;
goto out;
+ }
dev_dbg(&psdev->dev->dev, "%s fake irq handler: %d->%d\n",
dev_data->irq_name, dev_data->isr_on,
@@ -1441,7 +1486,7 @@ out:
return err;
parse_error:
- printk(KERN_ERR DRV_NAME ": Error parsing pci_devs_to_hide at \"%s\"\n",
+ pr_err("Error parsing pci_devs_to_hide at \"%s\"\n",
pci_devs_to_hide + pos);
return -EINVAL;
}
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 97f5d264c31..c4a0666de6f 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -3,6 +3,9 @@
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/wait.h>
#include <linux/bitops.h>
@@ -113,7 +116,8 @@ void xen_pcibk_reset_device(struct pci_dev *dev)
if (dev->msi_enabled)
pci_disable_msi(dev);
#endif
- pci_disable_device(dev);
+ if (pci_is_enabled(dev))
+ pci_disable_device(dev);
pci_write_config_word(dev, PCI_COMMAND, 0);
@@ -135,7 +139,6 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
struct xen_pcibk_dev_data *dev_data;
- int otherend = pdev->xdev->otherend_id;
int status;
if (unlikely(verbose_request))
@@ -144,8 +147,9 @@ int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
status = pci_enable_msi(dev);
if (status) {
- printk(KERN_ERR "error enable msi for guest %x status %x\n",
- otherend, status);
+ pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n",
+ pci_name(dev), pdev->xdev->otherend_id,
+ status);
op->value = 0;
return XEN_PCI_ERR_op_failed;
}
@@ -209,12 +213,11 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
entries[i].vector = op->msix_entries[i].vector;
}
- result = pci_enable_msix(dev, entries, op->value);
-
+ result = pci_enable_msix_exact(dev, entries, op->value);
if (result == 0) {
for (i = 0; i < op->value; i++) {
op->msix_entries[i].entry = entries[i].entry;
- if (entries[i].vector)
+ if (entries[i].vector) {
op->msix_entries[i].vector =
xen_pirq_from_irq(entries[i].vector);
if (unlikely(verbose_request))
@@ -222,11 +225,12 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
"MSI-X[%d]: %d\n",
pci_name(dev), i,
op->msix_entries[i].vector);
+ }
}
- } else {
- printk(KERN_WARNING DRV_NAME ": %s: failed to enable MSI-X: err %d!\n",
- pci_name(dev), result);
- }
+ } else
+ pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n",
+ pci_name(dev), pdev->xdev->otherend_id,
+ result);
kfree(entries);
op->value = result;
@@ -344,9 +348,9 @@ void xen_pcibk_do_op(struct work_struct *data)
notify_remote_via_irq(pdev->evtchn_irq);
/* Mark that we're done. */
- smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
+ smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
clear_bit(_PDEVF_op_active, &pdev->flags);
- smp_mb__after_clear_bit(); /* /before/ final check for work */
+ smp_mb__after_atomic(); /* /before/ final check for work */
/* Check to see if the driver domain tried to start another request in
* between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
@@ -371,7 +375,7 @@ static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id)
dev_data->handled++;
if ((dev_data->handled % 1000) == 0) {
if (xen_test_irq_shared(irq)) {
- printk(KERN_INFO "%s IRQ line is not shared "
+ pr_info("%s IRQ line is not shared "
"with other domains. Turning ISR off\n",
dev_data->irq_name);
dev_data->ack_intr = 0;
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index 0f478ac483c..51afff96c51 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -5,6 +5,8 @@
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/pci.h>
@@ -102,8 +104,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev_entry, list);
if (match_slot(dev, t->dev)) {
- pr_info(DRV_NAME ": vpci: %s: "
- "assign to virtual slot %d func %d\n",
+ pr_info("vpci: %s: assign to virtual slot %d func %d\n",
pci_name(dev), slot,
PCI_FUNC(dev->devfn));
list_add_tail(&dev_entry->list,
@@ -117,9 +118,8 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
/* Assign to a new slot on the virtual PCI bus */
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
if (list_empty(&vpci_dev->dev_list[slot])) {
- printk(KERN_INFO DRV_NAME
- ": vpci: %s: assign to virtual slot %d\n",
- pci_name(dev), slot);
+ pr_info("vpci: %s: assign to virtual slot %d\n",
+ pci_name(dev), slot);
list_add_tail(&dev_entry->list,
&vpci_dev->dev_list[slot]);
func = dev->is_virtfn ? 0 : PCI_FUNC(dev->devfn);
@@ -137,6 +137,8 @@ unlock:
/* Publish this device. */
if (!err)
err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
+ else
+ kfree(dev_entry);
out:
return err;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 64b11f99eac..4a7e6e0a5f4 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -3,6 +3,9 @@
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/init.h>
#include <linux/list.h>
@@ -90,6 +93,8 @@ static void free_pdev(struct xen_pcibk_device *pdev)
xen_pcibk_disconnect(pdev);
+ /* N.B. This calls pcistub_put_pci_dev which does the FLR on all
+ * of the PCIe devices. */
xen_pcibk_release_devices(pdev);
dev_set_drvdata(&pdev->xdev->dev, NULL);
@@ -283,6 +288,8 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
xen_unregister_device_domain_owner(dev);
+ /* N.B. This ends up calling pcistub_put_pci_dev which ends up
+ * doing the FLR. */
xen_pcibk_release_pci_dev(pdev, dev);
out:
@@ -723,14 +730,13 @@ int __init xen_pcibk_xenbus_register(void)
{
xen_pcibk_wq = create_workqueue("xen_pciback_workqueue");
if (!xen_pcibk_wq) {
- printk(KERN_ERR "%s: create"
- "xen_pciback_workqueue failed\n", __func__);
+ pr_err("%s: create xen_pciback_workqueue failed\n", __func__);
return -EFAULT;
}
xen_pcibk_backend = &xen_pcibk_vpci_backend;
if (passthrough)
xen_pcibk_backend = &xen_pcibk_passthrough_backend;
- pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name);
+ pr_info("backend is %s\n", xen_pcibk_backend->name);
return xenbus_register_backend(&xen_pcibk_driver);
}
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 2552d3e0a70..3b2bffde534 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -53,20 +53,19 @@
* System configuration note: Selfballooning should not be enabled on
* systems without a sufficiently large swap device configured; for best
* results, it is recommended that total swap be increased by the size
- * of the guest memory. Also, while technically not required to be
- * configured, it is highly recommended that frontswap also be configured
- * and enabled when selfballooning is running. So, selfballooning
- * is disabled by default if frontswap is not configured and can only
- * be enabled with the "selfballooning" kernel boot option; similarly
- * selfballooning is enabled by default if frontswap is configured and
- * can be disabled with the "noselfballooning" kernel boot option. Finally,
- * when frontswap is configured, frontswap-selfshrinking can be disabled
- * with the "noselfshrink" kernel boot option.
+ * of the guest memory. Note, that selfballooning should be disabled by default
+ * if frontswap is not configured. Similarly selfballooning should be enabled
+ * by default if frontswap is configured and can be disabled with the
+ * "tmem.selfballooning=0" kernel boot option. Finally, when frontswap is
+ * configured, frontswap-selfshrinking can be disabled with the
+ * "tmem.selfshrink=0" kernel boot option.
*
* Selfballooning is disallowed in domain0 and force-disabled.
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/bootmem.h>
#include <linux/swap.h>
@@ -120,9 +119,6 @@ static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
/* Enable/disable with sysfs. */
static bool frontswap_selfshrinking __read_mostly;
-/* Enable/disable with kernel boot option. */
-static bool use_frontswap_selfshrink __initdata = true;
-
/*
* The default values for the following parameters were deemed reasonable
* by experimentation, may be workload-dependent, and can all be
@@ -174,40 +170,13 @@ static void frontswap_selfshrink(void)
tgt_frontswap_pages = cur_frontswap_pages -
(cur_frontswap_pages / frontswap_hysteresis);
frontswap_shrink(tgt_frontswap_pages);
+ frontswap_inertia_counter = frontswap_inertia;
}
-static int __init xen_nofrontswap_selfshrink_setup(char *s)
-{
- use_frontswap_selfshrink = false;
- return 1;
-}
-
-__setup("noselfshrink", xen_nofrontswap_selfshrink_setup);
-
-/* Disable with kernel boot option. */
-static bool use_selfballooning __initdata = true;
-
-static int __init xen_noselfballooning_setup(char *s)
-{
- use_selfballooning = false;
- return 1;
-}
-
-__setup("noselfballooning", xen_noselfballooning_setup);
-#else /* !CONFIG_FRONTSWAP */
-/* Enable with kernel boot option. */
-static bool use_selfballooning __initdata = false;
-
-static int __init xen_selfballooning_setup(char *s)
-{
- use_selfballooning = true;
- return 1;
-}
-
-__setup("selfballooning", xen_selfballooning_setup);
#endif /* CONFIG_FRONTSWAP */
#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
+#define PAGES2MB(pages) ((pages) >> (20 - PAGE_SHIFT))
/*
* Use current balloon size, the goal (vm_committed_as), and hysteresis
@@ -298,8 +267,10 @@ static ssize_t store_selfballooning(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &tmp);
- if (err || ((tmp != 0) && (tmp != 1)))
+ err = kstrtoul(buf, 10, &tmp);
+ if (err)
+ return err;
+ if ((tmp != 0) && (tmp != 1))
return -EINVAL;
xen_selfballooning_enabled = !!tmp;
@@ -325,8 +296,10 @@ static ssize_t store_selfballoon_interval(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_interval = val;
return count;
@@ -347,8 +320,10 @@ static ssize_t store_selfballoon_downhys(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_downhysteresis = val;
return count;
@@ -370,8 +345,10 @@ static ssize_t store_selfballoon_uphys(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_uphysteresis = val;
return count;
@@ -393,8 +370,10 @@ static ssize_t store_selfballoon_min_usable_mb(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_min_usable_mb = val;
return count;
@@ -417,8 +396,10 @@ static ssize_t store_selfballoon_reserved_mb(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
selfballoon_reserved_mb = val;
return count;
@@ -443,8 +424,10 @@ static ssize_t store_frontswap_selfshrinking(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &tmp);
- if (err || ((tmp != 0) && (tmp != 1)))
+ err = kstrtoul(buf, 10, &tmp);
+ if (err)
+ return err;
+ if ((tmp != 0) && (tmp != 1))
return -EINVAL;
frontswap_selfshrinking = !!tmp;
if (!was_enabled && !xen_selfballooning_enabled &&
@@ -470,8 +453,10 @@ static ssize_t store_frontswap_inertia(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
frontswap_inertia = val;
frontswap_inertia_counter = val;
@@ -493,8 +478,10 @@ static ssize_t store_frontswap_hysteresis(struct device *dev,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- err = strict_strtoul(buf, 10, &val);
- if (err || val == 0)
+ err = kstrtoul(buf, 10, &val);
+ if (err)
+ return err;
+ if (val == 0)
return -EINVAL;
frontswap_hysteresis = val;
return count;
@@ -537,41 +524,56 @@ int register_xen_selfballooning(struct device *dev)
}
EXPORT_SYMBOL(register_xen_selfballooning);
-static int __init xen_selfballoon_init(void)
+int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink)
{
bool enable = false;
+ unsigned long reserve_pages;
if (!xen_domain())
return -ENODEV;
if (xen_initial_domain()) {
- pr_info("xen/balloon: Xen selfballooning driver "
- "disabled for domain0.\n");
+ pr_info("Xen selfballooning driver disabled for domain0\n");
return -ENODEV;
}
xen_selfballooning_enabled = tmem_enabled && use_selfballooning;
if (xen_selfballooning_enabled) {
- pr_info("xen/balloon: Initializing Xen "
- "selfballooning driver.\n");
+ pr_info("Initializing Xen selfballooning driver\n");
enable = true;
}
#ifdef CONFIG_FRONTSWAP
frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink;
if (frontswap_selfshrinking) {
- pr_info("xen/balloon: Initializing frontswap "
- "selfshrinking driver.\n");
+ pr_info("Initializing frontswap selfshrinking driver\n");
enable = true;
}
#endif
if (!enable)
return -ENODEV;
+ /*
+ * Give selfballoon_reserved_mb a default value(10% of total ram pages)
+ * to make selfballoon not so aggressive.
+ *
+ * There are mainly two reasons:
+ * 1) The original goal_page didn't consider some pages used by kernel
+ * space, like slab pages and memory used by device drivers.
+ *
+ * 2) The balloon driver may not give back memory to guest OS fast
+ * enough when the workload suddenly aquries a lot of physical memory.
+ *
+ * In both cases, the guest OS will suffer from memory pressure and
+ * OOM killer may be triggered.
+ * By reserving extra 10% of total ram pages, we can keep the system
+ * much more reliably and response faster in some cases.
+ */
+ if (!selfballoon_reserved_mb) {
+ reserve_pages = totalram_pages / 10;
+ selfballoon_reserved_mb = PAGES2MB(reserve_pages);
+ }
schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ);
return 0;
}
-
-subsys_initcall(xen_selfballoon_init);
-
-MODULE_LICENSE("GPL");
+EXPORT_SYMBOL(xen_selfballoon_init);
diff --git a/drivers/xen/xen-stub.c b/drivers/xen/xen-stub.c
new file mode 100644
index 00000000000..bbef194c5b0
--- /dev/null
+++ b/drivers/xen/xen-stub.c
@@ -0,0 +1,100 @@
+/*
+ * xen-stub.c - stub drivers to reserve space for Xen
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Liu Jinsong <jinsong.liu@intel.com>
+ * Author: Jiang Yunhong <yunhong.jiang@intel.com>
+ *
+ * Copyright (C) 2012 Oracle Inc
+ * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/acpi.h>
+#include <xen/acpi.h>
+
+#ifdef CONFIG_ACPI
+
+/*--------------------------------------------
+ stub driver for Xen memory hotplug
+--------------------------------------------*/
+
+static const struct acpi_device_id memory_device_ids[] = {
+ {ACPI_MEMORY_DEVICE_HID, 0},
+ {"", 0},
+};
+
+static struct acpi_driver xen_stub_memory_device_driver = {
+ /* same name as native memory driver to block native loaded */
+ .name = "acpi_memhotplug",
+ .class = ACPI_MEMORY_DEVICE_CLASS,
+ .ids = memory_device_ids,
+};
+
+int xen_stub_memory_device_init(void)
+{
+ if (!xen_initial_domain())
+ return -ENODEV;
+
+ /* just reserve space for Xen, block native driver loaded */
+ return acpi_bus_register_driver(&xen_stub_memory_device_driver);
+}
+EXPORT_SYMBOL_GPL(xen_stub_memory_device_init);
+subsys_initcall(xen_stub_memory_device_init);
+
+void xen_stub_memory_device_exit(void)
+{
+ acpi_bus_unregister_driver(&xen_stub_memory_device_driver);
+}
+EXPORT_SYMBOL_GPL(xen_stub_memory_device_exit);
+
+
+/*--------------------------------------------
+ stub driver for Xen cpu hotplug
+--------------------------------------------*/
+
+static const struct acpi_device_id processor_device_ids[] = {
+ {ACPI_PROCESSOR_OBJECT_HID, 0},
+ {ACPI_PROCESSOR_DEVICE_HID, 0},
+ {"", 0},
+};
+
+static struct acpi_driver xen_stub_processor_driver = {
+ /* same name as native processor driver to block native loaded */
+ .name = "processor",
+ .class = ACPI_PROCESSOR_CLASS,
+ .ids = processor_device_ids,
+};
+
+int xen_stub_processor_init(void)
+{
+ if (!xen_initial_domain())
+ return -ENODEV;
+
+ /* just reserve space for Xen, block native driver loaded */
+ return acpi_bus_register_driver(&xen_stub_processor_driver);
+}
+EXPORT_SYMBOL_GPL(xen_stub_processor_init);
+subsys_initcall(xen_stub_processor_init);
+
+void xen_stub_processor_exit(void)
+{
+ acpi_bus_unregister_driver(&xen_stub_processor_driver);
+}
+EXPORT_SYMBOL_GPL(xen_stub_processor_exit);
+
+#endif
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index bcf3ba4a6ec..439c9dca9ee 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -30,6 +30,7 @@
* IN THE SOFTWARE.
*/
+#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/spinlock.h>
@@ -44,6 +45,7 @@
#include <xen/grant_table.h>
#include <xen/xenbus.h>
#include <xen/xen.h>
+#include <xen/features.h>
#include "xenbus_probe.h"
@@ -399,33 +401,6 @@ EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
/**
- * Bind to an existing interdomain event channel in another domain. Returns 0
- * on success and stores the local port in *port. On error, returns -errno,
- * switches the device to XenbusStateClosing, and saves the error in XenStore.
- */
-int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port)
-{
- struct evtchn_bind_interdomain bind_interdomain;
- int err;
-
- bind_interdomain.remote_dom = dev->otherend_id;
- bind_interdomain.remote_port = remote_port;
-
- err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
- &bind_interdomain);
- if (err)
- xenbus_dev_fatal(dev, err,
- "binding to event channel %d from domain %d",
- remote_port, dev->otherend_id);
- else
- *port = bind_interdomain.local_port;
-
- return err;
-}
-EXPORT_SYMBOL_GPL(xenbus_bind_evtchn);
-
-
-/**
* Free an existing event channel. Returns 0 on success or -errno on error.
*/
int xenbus_free_evtchn(struct xenbus_device *dev, int port)
@@ -533,7 +508,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
if (err)
- goto out_err;
+ goto out_err_free_ballooned_pages;
spin_lock(&xenbus_valloc_lock);
list_add(&node->next, &xenbus_valloc_pages);
@@ -542,8 +517,9 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
*vaddr = addr;
return 0;
- out_err:
+ out_err_free_ballooned_pages:
free_xenballooned_pages(1, &node->page);
+ out_err:
kfree(node);
return err;
}
@@ -741,7 +717,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
void __init xenbus_ring_ops_init(void)
{
- if (xen_pv_domain())
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
ring_ops = &ring_ops_pv;
else
ring_ops = &ring_ops_hvm;
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index c5aa55c5d37..fdb0f339d0a 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -30,6 +30,8 @@
* IN THE SOFTWARE.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/wait.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
@@ -205,13 +207,12 @@ int xb_init_comms(void)
struct xenstore_domain_interface *intf = xen_store_interface;
if (intf->req_prod != intf->req_cons)
- printk(KERN_ERR "XENBUS request ring is not quiescent "
- "(%08x:%08x)!\n", intf->req_cons, intf->req_prod);
+ pr_err("request ring is not quiescent (%08x:%08x)!\n",
+ intf->req_cons, intf->req_prod);
if (intf->rsp_prod != intf->rsp_cons) {
- printk(KERN_WARNING "XENBUS response ring is not quiescent "
- "(%08x:%08x): fixing up\n",
- intf->rsp_cons, intf->rsp_prod);
+ pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
+ intf->rsp_cons, intf->rsp_prod);
/* breaks kdump */
if (!reset_devices)
intf->rsp_cons = intf->rsp_prod;
@@ -225,7 +226,7 @@ int xb_init_comms(void)
err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
0, "xenbus", &xb_waitq);
if (err < 0) {
- printk(KERN_ERR "XENBUS request irq failed %i\n", err);
+ pr_err("request irq failed %i\n", err);
return err;
}
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h
index c8abd3b8a6c..e74f9c1fbd8 100644
--- a/drivers/xen/xenbus/xenbus_comms.h
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -45,6 +45,7 @@ int xb_wait_for_data_to_read(void);
int xs_input_avail(void);
extern struct xenstore_domain_interface *xen_store_interface;
extern int xen_store_evtchn;
+extern enum xenstore_init xen_store_domain_type;
extern const struct file_operations xen_xenbus_fops;
diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c
index d7300080076..b17707ee07d 100644
--- a/drivers/xen/xenbus/xenbus_dev_backend.c
+++ b/drivers/xen/xenbus/xenbus_dev_backend.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -70,22 +72,21 @@ static long xenbus_alloc(domid_t domid)
return err;
}
-static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned long data)
+static long xenbus_backend_ioctl(struct file *file, unsigned int cmd,
+ unsigned long data)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
switch (cmd) {
- case IOCTL_XENBUS_BACKEND_EVTCHN:
- if (xen_store_evtchn > 0)
- return xen_store_evtchn;
- return -ENODEV;
-
- case IOCTL_XENBUS_BACKEND_SETUP:
- return xenbus_alloc(data);
-
- default:
- return -ENOTTY;
+ case IOCTL_XENBUS_BACKEND_EVTCHN:
+ if (xen_store_evtchn > 0)
+ return xen_store_evtchn;
+ return -ENODEV;
+ case IOCTL_XENBUS_BACKEND_SETUP:
+ return xenbus_alloc(data);
+ default:
+ return -ENOTTY;
}
}
@@ -128,7 +129,7 @@ static int __init xenbus_backend_init(void)
err = misc_register(&xenbus_backend_dev);
if (err)
- printk(KERN_ERR "Could not register xenbus backend device\n");
+ pr_err("Could not register xenbus backend device\n");
return err;
}
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index ac727028e65..85534ea6355 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -35,6 +35,8 @@
* Turned xenfs into a loadable module.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/uio.h>
@@ -616,7 +618,7 @@ static int __init xenbus_init(void)
err = misc_register(&xenbus_dev);
if (err)
- printk(KERN_ERR "Could not register xenbus frontend device\n");
+ pr_err("Could not register xenbus frontend device\n");
return err;
}
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 038b71dbf03..3c0a74b3e9b 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -30,6 +30,8 @@
* IN THE SOFTWARE.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#define DPRINTK(fmt, args...) \
pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
__func__, __LINE__, ##args)
@@ -69,6 +71,9 @@ EXPORT_SYMBOL_GPL(xen_store_evtchn);
struct xenstore_domain_interface *xen_store_interface;
EXPORT_SYMBOL_GPL(xen_store_interface);
+enum xenstore_init xen_store_domain_type;
+EXPORT_SYMBOL_GPL(xen_store_domain_type);
+
static unsigned long xen_store_mfn;
static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
@@ -277,15 +282,15 @@ void xenbus_dev_shutdown(struct device *_dev)
get_device(&dev->dev);
if (dev->state != XenbusStateConnected) {
- printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__,
- dev->nodename, xenbus_strstate(dev->state));
+ pr_info("%s: %s: %s != Connected, skipping\n",
+ __func__, dev->nodename, xenbus_strstate(dev->state));
goto out;
}
xenbus_switch_state(dev, XenbusStateClosing);
timeout = wait_for_completion_timeout(&dev->down, timeout);
if (!timeout)
- printk(KERN_INFO "%s: %s timeout closing device\n",
- __func__, dev->nodename);
+ pr_info("%s: %s timeout closing device\n",
+ __func__, dev->nodename);
out:
put_device(&dev->dev);
}
@@ -379,12 +384,14 @@ static ssize_t nodename_show(struct device *dev,
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
}
+static DEVICE_ATTR_RO(nodename);
static ssize_t devtype_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
}
+static DEVICE_ATTR_RO(devtype);
static ssize_t modalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -392,14 +399,24 @@ static ssize_t modalias_show(struct device *dev,
return sprintf(buf, "%s:%s\n", dev->bus->name,
to_xenbus_device(dev)->devicetype);
}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *xenbus_dev_attrs[] = {
+ &dev_attr_nodename.attr,
+ &dev_attr_devtype.attr,
+ &dev_attr_modalias.attr,
+ NULL,
+};
-struct device_attribute xenbus_dev_attrs[] = {
- __ATTR_RO(nodename),
- __ATTR_RO(devtype),
- __ATTR_RO(modalias),
- __ATTR_NULL
+static const struct attribute_group xenbus_dev_group = {
+ .attrs = xenbus_dev_attrs,
};
-EXPORT_SYMBOL_GPL(xenbus_dev_attrs);
+
+const struct attribute_group *xenbus_dev_groups[] = {
+ &xenbus_dev_group,
+ NULL,
+};
+EXPORT_SYMBOL_GPL(xenbus_dev_groups);
int xenbus_probe_node(struct xen_bus_type *bus,
const char *type,
@@ -444,7 +461,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
if (err)
goto fail;
- dev_set_name(&xendev->dev, devname);
+ dev_set_name(&xendev->dev, "%s", devname);
/* Register with generic device framework. */
err = device_register(&xendev->dev);
@@ -576,8 +593,7 @@ int xenbus_dev_suspend(struct device *dev)
if (drv->suspend)
err = drv->suspend(xdev);
if (err)
- printk(KERN_WARNING
- "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
+ pr_warn("suspend %s failed: %i\n", dev_name(dev), err);
return 0;
}
EXPORT_SYMBOL_GPL(xenbus_dev_suspend);
@@ -596,9 +612,8 @@ int xenbus_dev_resume(struct device *dev)
drv = to_xenbus_driver(dev->driver);
err = talk_to_otherend(xdev);
if (err) {
- printk(KERN_WARNING
- "xenbus: resume (talk_to_otherend) %s failed: %i\n",
- dev_name(dev), err);
+ pr_warn("resume (talk_to_otherend) %s failed: %i\n",
+ dev_name(dev), err);
return err;
}
@@ -607,18 +622,15 @@ int xenbus_dev_resume(struct device *dev)
if (drv->resume) {
err = drv->resume(xdev);
if (err) {
- printk(KERN_WARNING
- "xenbus: resume %s failed: %i\n",
- dev_name(dev), err);
+ pr_warn("resume %s failed: %i\n", dev_name(dev), err);
return err;
}
}
err = watch_otherend(xdev);
if (err) {
- printk(KERN_WARNING
- "xenbus_probe: resume (watch_otherend) %s failed: "
- "%d.\n", dev_name(dev), err);
+ pr_warn("resume (watch_otherend) %s failed: %d.\n",
+ dev_name(dev), err);
return err;
}
@@ -719,17 +731,11 @@ static int __init xenstored_local_init(void)
return err;
}
-enum xenstore_init {
- UNKNOWN,
- PV,
- HVM,
- LOCAL,
-};
static int __init xenbus_init(void)
{
int err = 0;
- enum xenstore_init usage = UNKNOWN;
uint64_t v = 0;
+ xen_store_domain_type = XS_UNKNOWN;
if (!xen_domain())
return -ENODEV;
@@ -737,29 +743,29 @@ static int __init xenbus_init(void)
xenbus_ring_ops_init();
if (xen_pv_domain())
- usage = PV;
+ xen_store_domain_type = XS_PV;
if (xen_hvm_domain())
- usage = HVM;
+ xen_store_domain_type = XS_HVM;
if (xen_hvm_domain() && xen_initial_domain())
- usage = LOCAL;
+ xen_store_domain_type = XS_LOCAL;
if (xen_pv_domain() && !xen_start_info->store_evtchn)
- usage = LOCAL;
+ xen_store_domain_type = XS_LOCAL;
if (xen_pv_domain() && xen_start_info->store_evtchn)
xenstored_ready = 1;
- switch (usage) {
- case LOCAL:
+ switch (xen_store_domain_type) {
+ case XS_LOCAL:
err = xenstored_local_init();
if (err)
goto out_error;
xen_store_interface = mfn_to_virt(xen_store_mfn);
break;
- case PV:
+ case XS_PV:
xen_store_evtchn = xen_start_info->store_evtchn;
xen_store_mfn = xen_start_info->store_mfn;
xen_store_interface = mfn_to_virt(xen_store_mfn);
break;
- case HVM:
+ case XS_HVM:
err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
if (err)
goto out_error;
@@ -769,7 +775,7 @@ static int __init xenbus_init(void)
goto out_error;
xen_store_mfn = (unsigned long)v;
xen_store_interface =
- ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
+ xen_remap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
break;
default:
pr_warn("Xenstore state unknown\n");
@@ -779,8 +785,7 @@ static int __init xenbus_init(void)
/* Initialize the interface to xenstore. */
err = xs_init();
if (err) {
- printk(KERN_WARNING
- "XENBUS: Error initializing xenstore comms: %i\n", err);
+ pr_warn("Error initializing xenstore comms: %i\n", err);
goto out_error;
}
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
index bb4f92ed873..1085ec294a1 100644
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -47,7 +47,14 @@ struct xen_bus_type {
struct bus_type bus;
};
-extern struct device_attribute xenbus_dev_attrs[];
+enum xenstore_init {
+ XS_UNKNOWN,
+ XS_PV,
+ XS_HVM,
+ XS_LOCAL,
+};
+
+extern const struct attribute_group *xenbus_dev_groups[];
extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
extern int xenbus_dev_probe(struct device *_dev);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 257be37d909..5125dce11a6 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -31,9 +31,11 @@
* IN THE SOFTWARE.
*/
-#define DPRINTK(fmt, args...) \
- pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
- __func__, __LINE__, ##args)
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define DPRINTK(fmt, ...) \
+ pr_debug("(%s:%d) " fmt "\n", \
+ __func__, __LINE__, ##__VA_ARGS__)
#include <linux/kernel.h>
#include <linux/err.h>
@@ -198,7 +200,7 @@ static struct xen_bus_type xenbus_backend = {
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown,
- .dev_attrs = xenbus_dev_attrs,
+ .dev_groups = xenbus_dev_groups,
},
};
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 3159a37d966..cb385c10d2b 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -1,6 +1,8 @@
-#define DPRINTK(fmt, args...) \
- pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
- __func__, __LINE__, ##args)
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define DPRINTK(fmt, ...) \
+ pr_debug("(%s:%d) " fmt "\n", \
+ __func__, __LINE__, ##__VA_ARGS__)
#include <linux/kernel.h>
#include <linux/err.h>
@@ -29,18 +31,20 @@
#include "xenbus_probe.h"
+static struct workqueue_struct *xenbus_frontend_wq;
+
/* device/<type>/<id> => <type>-<id> */
static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
{
nodename = strchr(nodename, '/');
if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) {
- printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
+ pr_warn("bad frontend %s\n", nodename);
return -EINVAL;
}
strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE);
if (!strchr(bus_id, '/')) {
- printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
+ pr_warn("bus_id %s no slash\n", bus_id);
return -EINVAL;
}
*strchr(bus_id, '/') = '-';
@@ -89,9 +93,49 @@ static void backend_changed(struct xenbus_watch *watch,
xenbus_otherend_changed(watch, vec, len, 1);
}
+static void xenbus_frontend_delayed_resume(struct work_struct *w)
+{
+ struct xenbus_device *xdev = container_of(w, struct xenbus_device, work);
+
+ xenbus_dev_resume(&xdev->dev);
+}
+
+static int xenbus_frontend_dev_resume(struct device *dev)
+{
+ /*
+ * If xenstored is running in this domain, we cannot access the backend
+ * state at the moment, so we need to defer xenbus_dev_resume
+ */
+ if (xen_store_domain_type == XS_LOCAL) {
+ struct xenbus_device *xdev = to_xenbus_device(dev);
+
+ if (!xenbus_frontend_wq) {
+ pr_err("%s: no workqueue to process delayed resume\n",
+ xdev->nodename);
+ return -EFAULT;
+ }
+
+ queue_work(xenbus_frontend_wq, &xdev->work);
+
+ return 0;
+ }
+
+ return xenbus_dev_resume(dev);
+}
+
+static int xenbus_frontend_dev_probe(struct device *dev)
+{
+ if (xen_store_domain_type == XS_LOCAL) {
+ struct xenbus_device *xdev = to_xenbus_device(dev);
+ INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume);
+ }
+
+ return xenbus_dev_probe(dev);
+}
+
static const struct dev_pm_ops xenbus_pm_ops = {
.suspend = xenbus_dev_suspend,
- .resume = xenbus_dev_resume,
+ .resume = xenbus_frontend_dev_resume,
.freeze = xenbus_dev_suspend,
.thaw = xenbus_dev_cancel,
.restore = xenbus_dev_resume,
@@ -107,10 +151,10 @@ static struct xen_bus_type xenbus_frontend = {
.name = "xen",
.match = xenbus_match,
.uevent = xenbus_uevent_frontend,
- .probe = xenbus_dev_probe,
+ .probe = xenbus_frontend_dev_probe,
.remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown,
- .dev_attrs = xenbus_dev_attrs,
+ .dev_groups = xenbus_dev_groups,
.pm = &xenbus_pm_ops,
},
@@ -201,15 +245,13 @@ static int print_device_status(struct device *dev, void *data)
if (!dev->driver) {
/* Information only: is this too noisy? */
- printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
- xendev->nodename);
+ pr_info("Device with no driver: %s\n", xendev->nodename);
} else if (xendev->state < XenbusStateConnected) {
enum xenbus_state rstate = XenbusStateUnknown;
if (xendev->otherend)
rstate = xenbus_read_driver_state(xendev->otherend);
- printk(KERN_WARNING "XENBUS: Timeout connecting "
- "to device: %s (local state %d, remote state %d)\n",
- xendev->nodename, xendev->state, rstate);
+ pr_warn("Timeout connecting to device: %s (local state %d, remote state %d)\n",
+ xendev->nodename, xendev->state, rstate);
}
return 0;
@@ -223,12 +265,13 @@ static bool wait_loop(unsigned long start, unsigned int max_delay,
{
if (time_after(jiffies, start + (*seconds_waited+5)*HZ)) {
if (!*seconds_waited)
- printk(KERN_WARNING "XENBUS: Waiting for "
- "devices to initialise: ");
+ pr_warn("Waiting for devices to initialise: ");
*seconds_waited += 5;
- printk("%us...", max_delay - *seconds_waited);
- if (*seconds_waited == max_delay)
+ pr_cont("%us...", max_delay - *seconds_waited);
+ if (*seconds_waited == max_delay) {
+ pr_cont("\n");
return true;
+ }
}
schedule_timeout_interruptible(HZ/10);
@@ -309,7 +352,7 @@ static void xenbus_reset_wait_for_backend(char *be, int expected)
timeout = wait_event_interruptible_timeout(backend_state_wq,
backend_state == expected, 5 * HZ);
if (timeout <= 0)
- printk(KERN_INFO "XENBUS: backend %s timed out.\n", be);
+ pr_info("backend %s timed out\n", be);
}
/*
@@ -332,7 +375,7 @@ static void xenbus_reset_frontend(char *fe, char *be, int be_state)
be_watch.callback = xenbus_reset_backend_state_changed;
backend_state = XenbusStateUnknown;
- printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be);
+ pr_info("triggering reconnect on %s\n", be);
register_xenbus_watch(&be_watch);
/* fall through to forward backend to state XenbusStateInitialising */
@@ -351,7 +394,7 @@ static void xenbus_reset_frontend(char *fe, char *be, int be_state)
}
unregister_xenbus_watch(&be_watch);
- printk(KERN_INFO "XENBUS: reconnect done on %s\n", be);
+ pr_info("reconnect done on %s\n", be);
kfree(be_watch.node);
}
@@ -440,6 +483,12 @@ static int __init xenbus_probe_frontend_init(void)
register_xenstore_notifier(&xenstore_notifier);
+ if (xen_store_domain_type == XS_LOCAL) {
+ xenbus_frontend_wq = create_workqueue("xenbus_frontend");
+ if (!xenbus_frontend_wq)
+ pr_warn("create xenbus frontend workqueue failed, S3 resume is likely to fail\n");
+ }
+
return 0;
}
subsys_initcall(xenbus_probe_frontend_init);
@@ -447,7 +496,7 @@ subsys_initcall(xenbus_probe_frontend_init);
#ifndef MODULE
static int __init boot_wait_for_devices(void)
{
- if (xen_hvm_domain() && !xen_platform_pci_unplug)
+ if (!xen_has_pv_devices())
return -ENODEV;
ready_to_wait_for_devices = 1;
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 88e677b0de7..ba804f3d827 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -31,6 +31,8 @@
* IN THE SOFTWARE.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/unistd.h>
#include <linux/errno.h>
#include <linux/types.h>
@@ -48,6 +50,7 @@
#include <xen/xenbus.h>
#include <xen/xen.h>
#include "xenbus_comms.h"
+#include "xenbus_probe.h"
struct xs_stored_msg {
struct list_head list;
@@ -129,15 +132,37 @@ static int get_error(const char *errorstring)
for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) {
if (i == ARRAY_SIZE(xsd_errors) - 1) {
- printk(KERN_WARNING
- "XENBUS xen store gave: unknown error %s",
- errorstring);
+ pr_warn("xen store gave: unknown error %s\n",
+ errorstring);
return EINVAL;
}
}
return xsd_errors[i].errnum;
}
+static bool xenbus_ok(void)
+{
+ switch (xen_store_domain_type) {
+ case XS_LOCAL:
+ switch (system_state) {
+ case SYSTEM_POWER_OFF:
+ case SYSTEM_RESTART:
+ case SYSTEM_HALT:
+ return false;
+ default:
+ break;
+ }
+ return true;
+ case XS_PV:
+ case XS_HVM:
+ /* FIXME: Could check that the remote domain is alive,
+ * but it is normally initial domain. */
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
{
struct xs_stored_msg *msg;
@@ -147,9 +172,20 @@ static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
while (list_empty(&xs_state.reply_list)) {
spin_unlock(&xs_state.reply_lock);
- /* XXX FIXME: Avoid synchronous wait for response here. */
- wait_event(xs_state.reply_waitq,
- !list_empty(&xs_state.reply_list));
+ if (xenbus_ok())
+ /* XXX FIXME: Avoid synchronous wait for response here. */
+ wait_event_timeout(xs_state.reply_waitq,
+ !list_empty(&xs_state.reply_list),
+ msecs_to_jiffies(500));
+ else {
+ /*
+ * If we are in the process of being shut-down there is
+ * no point of trying to contact XenBus - it is either
+ * killed (xenstored application) or the other domain
+ * has been killed or is unreachable.
+ */
+ return ERR_PTR(-EIO);
+ }
spin_lock(&xs_state.reply_lock);
}
@@ -214,6 +250,9 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
mutex_unlock(&xs_state.request_mutex);
+ if (IS_ERR(ret))
+ return ret;
+
if ((msg->type == XS_TRANSACTION_END) ||
((req_msg.type == XS_TRANSACTION_START) &&
(msg->type == XS_ERROR)))
@@ -272,10 +311,8 @@ static void *xs_talkv(struct xenbus_transaction t,
}
if (msg.type != type) {
- if (printk_ratelimit())
- printk(KERN_WARNING
- "XENBUS unexpected type [%d], expected [%d]\n",
- msg.type, type);
+ pr_warn_ratelimited("unexpected type [%d], expected [%d]\n",
+ msg.type, type);
kfree(ret);
return ERR_PTR(-EINVAL);
}
@@ -655,7 +692,7 @@ static void xs_reset_watches(void)
err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
if (err && err != -EEXIST)
- printk(KERN_WARNING "xs_reset_watches failed: %d\n", err);
+ pr_warn("xs_reset_watches failed: %d\n", err);
}
/* Register callback to watch this node. */
@@ -705,9 +742,7 @@ void unregister_xenbus_watch(struct xenbus_watch *watch)
err = xs_unwatch(watch->node, token);
if (err)
- printk(KERN_WARNING
- "XENBUS Failed to release watch %s: %i\n",
- watch->node, err);
+ pr_warn("Failed to release watch %s: %i\n", watch->node, err);
up_read(&xs_state.watch_mutex);
@@ -901,8 +936,7 @@ static int xenbus_thread(void *unused)
for (;;) {
err = process_msg();
if (err)
- printk(KERN_WARNING "XENBUS error %d while reading "
- "message\n", err);
+ pr_warn("error %d while reading message\n", err);
if (kthread_should_stop())
break;
}
diff --git a/drivers/xen/xencomm.c b/drivers/xen/xencomm.c
deleted file mode 100644
index b91f8ff50d0..00000000000
--- a/drivers/xen/xencomm.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Copyright (C) IBM Corp. 2006
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <asm/page.h>
-#include <xen/xencomm.h>
-#include <xen/interface/xen.h>
-#include <asm/xen/xencomm.h> /* for xencomm_is_phys_contiguous() */
-
-static int xencomm_init(struct xencomm_desc *desc,
- void *buffer, unsigned long bytes)
-{
- unsigned long recorded = 0;
- int i = 0;
-
- while ((recorded < bytes) && (i < desc->nr_addrs)) {
- unsigned long vaddr = (unsigned long)buffer + recorded;
- unsigned long paddr;
- int offset;
- int chunksz;
-
- offset = vaddr % PAGE_SIZE; /* handle partial pages */
- chunksz = min(PAGE_SIZE - offset, bytes - recorded);
-
- paddr = xencomm_vtop(vaddr);
- if (paddr == ~0UL) {
- printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n",
- __func__, vaddr);
- return -EINVAL;
- }
-
- desc->address[i++] = paddr;
- recorded += chunksz;
- }
-
- if (recorded < bytes) {
- printk(KERN_DEBUG
- "%s: could only translate %ld of %ld bytes\n",
- __func__, recorded, bytes);
- return -ENOSPC;
- }
-
- /* mark remaining addresses invalid (just for safety) */
- while (i < desc->nr_addrs)
- desc->address[i++] = XENCOMM_INVALID;
-
- desc->magic = XENCOMM_MAGIC;
-
- return 0;
-}
-
-static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask,
- void *buffer, unsigned long bytes)
-{
- struct xencomm_desc *desc;
- unsigned long buffer_ulong = (unsigned long)buffer;
- unsigned long start = buffer_ulong & PAGE_MASK;
- unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK;
- unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT;
- unsigned long size = sizeof(*desc) +
- sizeof(desc->address[0]) * nr_addrs;
-
- /*
- * slab allocator returns at least sizeof(void*) aligned pointer.
- * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might
- * cross page boundary.
- */
- if (sizeof(*desc) > sizeof(void *)) {
- unsigned long order = get_order(size);
- desc = (struct xencomm_desc *)__get_free_pages(gfp_mask,
- order);
- if (desc == NULL)
- return NULL;
-
- desc->nr_addrs =
- ((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) /
- sizeof(*desc->address);
- } else {
- desc = kmalloc(size, gfp_mask);
- if (desc == NULL)
- return NULL;
-
- desc->nr_addrs = nr_addrs;
- }
- return desc;
-}
-
-void xencomm_free(struct xencomm_handle *desc)
-{
- if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) {
- struct xencomm_desc *desc__ = (struct xencomm_desc *)desc;
- if (sizeof(*desc__) > sizeof(void *)) {
- unsigned long size = sizeof(*desc__) +
- sizeof(desc__->address[0]) * desc__->nr_addrs;
- unsigned long order = get_order(size);
- free_pages((unsigned long)__va(desc), order);
- } else
- kfree(__va(desc));
- }
-}
-
-static int xencomm_create(void *buffer, unsigned long bytes,
- struct xencomm_desc **ret, gfp_t gfp_mask)
-{
- struct xencomm_desc *desc;
- int rc;
-
- pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes);
-
- if (bytes == 0) {
- /* don't create a descriptor; Xen recognizes NULL. */
- BUG_ON(buffer != NULL);
- *ret = NULL;
- return 0;
- }
-
- BUG_ON(buffer == NULL); /* 'bytes' is non-zero */
-
- desc = xencomm_alloc(gfp_mask, buffer, bytes);
- if (!desc) {
- printk(KERN_DEBUG "%s failure\n", "xencomm_alloc");
- return -ENOMEM;
- }
-
- rc = xencomm_init(desc, buffer, bytes);
- if (rc) {
- printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc);
- xencomm_free((struct xencomm_handle *)__pa(desc));
- return rc;
- }
-
- *ret = desc;
- return 0;
-}
-
-static struct xencomm_handle *xencomm_create_inline(void *ptr)
-{
- unsigned long paddr;
-
- BUG_ON(!xencomm_is_phys_contiguous((unsigned long)ptr));
-
- paddr = (unsigned long)xencomm_pa(ptr);
- BUG_ON(paddr & XENCOMM_INLINE_FLAG);
- return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
-}
-
-/* "mini" routine, for stack-based communications: */
-static int xencomm_create_mini(void *buffer,
- unsigned long bytes, struct xencomm_mini *xc_desc,
- struct xencomm_desc **ret)
-{
- int rc = 0;
- struct xencomm_desc *desc;
- BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0);
-
- desc = (void *)xc_desc;
-
- desc->nr_addrs = XENCOMM_MINI_ADDRS;
-
- rc = xencomm_init(desc, buffer, bytes);
- if (!rc)
- *ret = desc;
-
- return rc;
-}
-
-struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes)
-{
- int rc;
- struct xencomm_desc *desc;
-
- if (xencomm_is_phys_contiguous((unsigned long)ptr))
- return xencomm_create_inline(ptr);
-
- rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL);
-
- if (rc || desc == NULL)
- return NULL;
-
- return xencomm_pa(desc);
-}
-
-struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes,
- struct xencomm_mini *xc_desc)
-{
- int rc;
- struct xencomm_desc *desc = NULL;
-
- if (xencomm_is_phys_contiguous((unsigned long)ptr))
- return xencomm_create_inline(ptr);
-
- rc = xencomm_create_mini(ptr, bytes, xc_desc,
- &desc);
-
- if (rc)
- return NULL;
-
- return xencomm_pa(desc);
-}
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 459b9ac45cf..06092e0fe8c 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -7,6 +7,8 @@
* Turned xenfs into a loadable module.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/module.h>
@@ -24,47 +26,6 @@
MODULE_DESCRIPTION("Xen filesystem");
MODULE_LICENSE("GPL");
-static struct inode *xenfs_make_inode(struct super_block *sb, int mode)
-{
- struct inode *ret = new_inode(sb);
-
- if (ret) {
- ret->i_mode = mode;
- ret->i_uid = GLOBAL_ROOT_UID;
- ret->i_gid = GLOBAL_ROOT_GID;
- ret->i_blocks = 0;
- ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
- }
- return ret;
-}
-
-static struct dentry *xenfs_create_file(struct super_block *sb,
- struct dentry *parent,
- const char *name,
- const struct file_operations *fops,
- void *data,
- int mode)
-{
- struct dentry *dentry;
- struct inode *inode;
-
- dentry = d_alloc_name(parent, name);
- if (!dentry)
- return NULL;
-
- inode = xenfs_make_inode(sb, S_IFREG | mode);
- if (!inode) {
- dput(dentry);
- return NULL;
- }
-
- inode->i_fop = fops;
- inode->i_private = data;
-
- d_add(dentry, inode);
- return dentry;
-}
-
static ssize_t capabilities_read(struct file *file, char __user *buf,
size_t size, loff_t *off)
{
@@ -84,26 +45,23 @@ static const struct file_operations capabilities_file_ops = {
static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
{
static struct tree_descr xenfs_files[] = {
- [1] = {},
- { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR },
+ [2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR },
{ "capabilities", &capabilities_file_ops, S_IRUGO },
{ "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
{""},
};
- int rc;
-
- rc = simple_fill_super(sb, XENFS_SUPER_MAGIC, xenfs_files);
- if (rc < 0)
- return rc;
- if (xen_initial_domain()) {
- xenfs_create_file(sb, sb->s_root, "xsd_kva",
- &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR);
- xenfs_create_file(sb, sb->s_root, "xsd_port",
- &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR);
- }
+ static struct tree_descr xenfs_init_files[] = {
+ [2] = { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR },
+ { "capabilities", &capabilities_file_ops, S_IRUGO },
+ { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
+ { "xsd_kva", &xsd_kva_file_ops, S_IRUSR|S_IWUSR},
+ { "xsd_port", &xsd_port_file_ops, S_IRUSR|S_IWUSR},
+ {""},
+ };
- return rc;
+ return simple_fill_super(sb, XENFS_SUPER_MAGIC,
+ xen_initial_domain() ? xenfs_init_files : xenfs_files);
}
static struct dentry *xenfs_mount(struct file_system_type *fs_type,
@@ -119,13 +77,14 @@ static struct file_system_type xenfs_type = {
.mount = xenfs_mount,
.kill_sb = kill_litter_super,
};
+MODULE_ALIAS_FS("xenfs");
static int __init xenfs_init(void)
{
if (xen_domain())
return register_filesystem(&xenfs_type);
- printk(KERN_INFO "XENFS: not registering filesystem on non-xen platform\n");
+ pr_info("not registering filesystem on non-xen platform\n");
return 0;
}