diff options
Diffstat (limited to 'drivers/xen/xenbus')
| -rw-r--r-- | drivers/xen/xenbus/Makefile | 7 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_client.c | 341 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_comms.c | 44 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_comms.h | 6 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_dev_backend.c | 142 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_dev_frontend.c | 631 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 594 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_probe.h | 52 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_probe_backend.c | 274 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_probe_frontend.c | 510 | ||||
| -rw-r--r-- | drivers/xen/xenbus/xenbus_xs.c | 204 |
11 files changed, 2271 insertions, 534 deletions
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile index 5571f5b8422..31e2e9050c7 100644 --- a/drivers/xen/xenbus/Makefile +++ b/drivers/xen/xenbus/Makefile @@ -1,7 +1,14 @@ obj-y += xenbus.o +obj-y += xenbus_dev_frontend.o xenbus-objs = xenbus-objs += xenbus_client.o xenbus-objs += xenbus_comms.o xenbus-objs += xenbus_xs.o xenbus-objs += xenbus_probe.o + +xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o +xenbus-objs += $(xenbus-be-objs-y) + +obj-$(CONFIG_XEN_BACKEND) += xenbus_dev_backend.o +obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 9fd2f70ab46..439c9dca9ee 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -30,14 +30,43 @@ * IN THE SOFTWARE. */ +#include <linux/mm.h> +#include <linux/slab.h> #include <linux/types.h> +#include <linux/spinlock.h> #include <linux/vmalloc.h> +#include <linux/export.h> #include <asm/xen/hypervisor.h> +#include <asm/xen/page.h> #include <xen/interface/xen.h> #include <xen/interface/event_channel.h> +#include <xen/balloon.h> #include <xen/events.h> #include <xen/grant_table.h> #include <xen/xenbus.h> +#include <xen/xen.h> +#include <xen/features.h> + +#include "xenbus_probe.h" + +struct xenbus_map_node { + struct list_head next; + union { + struct vm_struct *area; /* PV */ + struct page *page; /* HVM */ + }; + grant_handle_t handle; +}; + +static DEFINE_SPINLOCK(xenbus_valloc_lock); +static LIST_HEAD(xenbus_valloc_pages); + +struct xenbus_ring_ops { + int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); + int (*unmap)(struct xenbus_device *dev, void *vaddr); +}; + +static const struct xenbus_ring_ops *ring_ops __read_mostly; const char *xenbus_strstate(enum xenbus_state state) { @@ -49,6 +78,8 @@ const char *xenbus_strstate(enum xenbus_state state) [ XenbusStateConnected ] = "Connected", [ XenbusStateClosing ] = "Closing", [ XenbusStateClosed ] = "Closed", + [XenbusStateReconfiguring] = "Reconfiguring", + [XenbusStateReconfigured] = "Reconfigured", }; return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; } @@ -117,7 +148,7 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, char *path; va_start(ap, pathfmt); - path = kvasprintf(GFP_KERNEL, pathfmt, ap); + path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap); va_end(ap); if (!path) { @@ -132,18 +163,12 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, } EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); +static void xenbus_switch_fatal(struct xenbus_device *, int, int, + const char *, ...); -/** - * xenbus_switch_state - * @dev: xenbus device - * @xbt: transaction handle - * @state: new state - * - * Advertise in the store a change of the given driver to the given new_state. - * Return 0 on success, or -errno on error. On error, the device will switch - * to XenbusStateClosing, and the error will be saved in the store. - */ -int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) +static int +__xenbus_switch_state(struct xenbus_device *dev, + enum xenbus_state state, int depth) { /* We check whether the state is currently set to the given value, and if not, then the state is set. We don't want to unconditionally @@ -152,35 +177,65 @@ int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) to it, as the device will be tearing down, and we don't want to resurrect that directory. - Note that, because of this cached value of our state, this function - will not work inside a Xenstore transaction (something it was - trying to in the past) because dev->state would not get reset if - the transaction was aborted. - + Note that, because of this cached value of our state, this + function will not take a caller's Xenstore transaction + (something it was trying to in the past) because dev->state + would not get reset if the transaction was aborted. */ + struct xenbus_transaction xbt; int current_state; - int err; + int err, abort; if (state == dev->state) return 0; - err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d", - ¤t_state); - if (err != 1) +again: + abort = 1; + + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_switch_fatal(dev, depth, err, "starting transaction"); return 0; + } + + err = xenbus_scanf(xbt, dev->nodename, "state", "%d", ¤t_state); + if (err != 1) + goto abort; - err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state); + err = xenbus_printf(xbt, dev->nodename, "state", "%d", state); if (err) { - if (state != XenbusStateClosing) /* Avoid looping */ - xenbus_dev_fatal(dev, err, "writing new state"); - return err; + xenbus_switch_fatal(dev, depth, err, "writing new state"); + goto abort; } - dev->state = state; + abort = 0; +abort: + err = xenbus_transaction_end(xbt, abort); + if (err) { + if (err == -EAGAIN && !abort) + goto again; + xenbus_switch_fatal(dev, depth, err, "ending transaction"); + } else + dev->state = state; return 0; } + +/** + * xenbus_switch_state + * @dev: xenbus device + * @state: new state + * + * Advertise in the store a change of the given driver to the given new_state. + * Return 0 on success, or -errno on error. On error, the device will switch + * to XenbusStateClosing, and the error will be saved in the store. + */ +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) +{ + return __xenbus_switch_state(dev, state, 0); +} + EXPORT_SYMBOL_GPL(xenbus_switch_state); int xenbus_frontend_closed(struct xenbus_device *dev) @@ -267,7 +322,7 @@ EXPORT_SYMBOL_GPL(xenbus_dev_error); * @fmt: error message format * * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by - * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly + * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly * closedown of this driver and its peer. */ @@ -284,6 +339,23 @@ void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...) EXPORT_SYMBOL_GPL(xenbus_dev_fatal); /** + * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps + * avoiding recursion within xenbus_switch_state. + */ +static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xenbus_va_dev_error(dev, err, fmt, ap); + va_end(ap); + + if (!depth) + __xenbus_switch_state(dev, XenbusStateClosing, 1); +} + +/** * xenbus_grant_ring * @dev: xenbus device * @ring_mfn: mfn of ring to grant @@ -329,33 +401,6 @@ EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); /** - * Bind to an existing interdomain event channel in another domain. Returns 0 - * on success and stores the local port in *port. On error, returns -errno, - * switches the device to XenbusStateClosing, and saves the error in XenStore. - */ -int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port) -{ - struct evtchn_bind_interdomain bind_interdomain; - int err; - - bind_interdomain.remote_dom = dev->otherend_id; - bind_interdomain.remote_port = remote_port; - - err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, - &bind_interdomain); - if (err) - xenbus_dev_fatal(dev, err, - "binding to event channel %d from domain %d", - remote_port, dev->otherend_id); - else - *port = bind_interdomain.local_port; - - return err; -} -EXPORT_SYMBOL_GPL(xenbus_bind_evtchn); - - -/** * Free an existing event channel. Returns 0 on success or -errno on error. */ int xenbus_free_evtchn(struct xenbus_device *dev, int port) @@ -390,39 +435,94 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); */ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) { + return ring_ops->map(dev, gnt_ref, vaddr); +} +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + +static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, + int gnt_ref, void **vaddr) +{ struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map, + .flags = GNTMAP_host_map | GNTMAP_contains_pte, .ref = gnt_ref, .dom = dev->otherend_id, }; + struct xenbus_map_node *node; struct vm_struct *area; + pte_t *pte; *vaddr = NULL; - area = alloc_vm_area(PAGE_SIZE); - if (!area) + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) return -ENOMEM; - op.host_addr = (unsigned long)area->addr; + area = alloc_vm_area(PAGE_SIZE, &pte); + if (!area) { + kfree(node); + return -ENOMEM; + } - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); + op.host_addr = arbitrary_virt_to_machine(pte).maddr; + + gnttab_batch_map(&op, 1); if (op.status != GNTST_okay) { free_vm_area(area); + kfree(node); xenbus_dev_fatal(dev, op.status, "mapping in shared page %d from domain %d", gnt_ref, dev->otherend_id); return op.status; } - /* Stuff the handle in an unused field */ - area->phys_addr = (unsigned long)op.handle; + node->handle = op.handle; + node->area = area; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); *vaddr = area->addr; return 0; } -EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + +static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, + int gnt_ref, void **vaddr) +{ + struct xenbus_map_node *node; + int err; + void *addr; + + *vaddr = NULL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); + if (err) + goto out_err; + + addr = pfn_to_kaddr(page_to_pfn(node->page)); + + err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); + if (err) + goto out_err_free_ballooned_pages; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); + + *vaddr = addr; + return 0; + + out_err_free_ballooned_pages: + free_xenballooned_pages(1, &node->page); + out_err: + kfree(node); + return err; +} /** @@ -442,15 +542,12 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, grant_handle_t *handle, void *vaddr) { - struct gnttab_map_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; + struct gnttab_map_grant_ref op; - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); + gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref, + dev->otherend_id); + + gnttab_batch_map(&op, 1); if (op.status != GNTST_okay) { xenbus_dev_fatal(dev, op.status, @@ -478,46 +575,87 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring); */ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) { - struct vm_struct *area; + return ring_ops->unmap(dev, vaddr); +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); + +static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) +{ + struct xenbus_map_node *node; struct gnttab_unmap_grant_ref op = { .host_addr = (unsigned long)vaddr, }; - - /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) - * method so that we don't have to muck with vmalloc internals here. - * We could force the user to hang on to their struct vm_struct from - * xenbus_map_ring_valloc, but these 6 lines considerably simplify - * this API. - */ - read_lock(&vmlist_lock); - for (area = vmlist; area != NULL; area = area->next) { - if (area->addr == vaddr) - break; + unsigned int level; + + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + if (node->area->addr == vaddr) { + list_del(&node->next); + goto found; + } } - read_unlock(&vmlist_lock); + node = NULL; + found: + spin_unlock(&xenbus_valloc_lock); - if (!area) { + if (!node) { xenbus_dev_error(dev, -ENOENT, "can't find mapped virtual address %p", vaddr); return GNTST_bad_virt_addr; } - op.handle = (grant_handle_t)area->phys_addr; + op.handle = node->handle; + op.host_addr = arbitrary_virt_to_machine( + lookup_address((unsigned long)vaddr, &level)).maddr; if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) BUG(); if (op.status == GNTST_okay) - free_vm_area(area); + free_vm_area(node->area); else xenbus_dev_error(dev, op.status, "unmapping page at handle %d error %d", - (int16_t)area->phys_addr, op.status); + node->handle, op.status); + kfree(node); return op.status; } -EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); +static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) +{ + int rv; + struct xenbus_map_node *node; + void *addr; + + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + addr = pfn_to_kaddr(page_to_pfn(node->page)); + if (addr == vaddr) { + list_del(&node->next); + goto found; + } + } + node = addr = NULL; + found: + spin_unlock(&xenbus_valloc_lock); + + if (!node) { + xenbus_dev_error(dev, -ENOENT, + "can't find mapped virtual address %p", vaddr); + return GNTST_bad_virt_addr; + } + + rv = xenbus_unmap_ring(dev, node->handle, addr); + + if (!rv) + free_xenballooned_pages(1, &node->page); + else + WARN(1, "Leaking %p\n", vaddr); + + kfree(node); + return rv; +} /** * xenbus_unmap_ring @@ -532,10 +670,9 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t handle, void *vaddr) { - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .handle = handle, - }; + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, handle); if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) BUG(); @@ -567,3 +704,21 @@ enum xenbus_state xenbus_read_driver_state(const char *path) return result; } EXPORT_SYMBOL_GPL(xenbus_read_driver_state); + +static const struct xenbus_ring_ops ring_ops_pv = { + .map = xenbus_map_ring_valloc_pv, + .unmap = xenbus_unmap_ring_vfree_pv, +}; + +static const struct xenbus_ring_ops ring_ops_hvm = { + .map = xenbus_map_ring_valloc_hvm, + .unmap = xenbus_unmap_ring_vfree_hvm, +}; + +void __init xenbus_ring_ops_init(void) +{ + if (!xen_feature(XENFEAT_auto_translated_physmap)) + ring_ops = &ring_ops_pv; + else + ring_ops = &ring_ops_hvm; +} diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 6efbe3f29ca..fdb0f339d0a 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -30,6 +30,8 @@ * IN THE SOFTWARE. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/wait.h> #include <linux/interrupt.h> #include <linux/sched.h> @@ -203,31 +205,39 @@ int xb_read(void *data, unsigned len) int xb_init_comms(void) { struct xenstore_domain_interface *intf = xen_store_interface; - int err; if (intf->req_prod != intf->req_cons) - printk(KERN_ERR "XENBUS request ring is not quiescent " - "(%08x:%08x)!\n", intf->req_cons, intf->req_prod); + pr_err("request ring is not quiescent (%08x:%08x)!\n", + intf->req_cons, intf->req_prod); if (intf->rsp_prod != intf->rsp_cons) { - printk(KERN_WARNING "XENBUS response ring is not quiescent " - "(%08x:%08x): fixing up\n", - intf->rsp_cons, intf->rsp_prod); - intf->rsp_cons = intf->rsp_prod; + pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n", + intf->rsp_cons, intf->rsp_prod); + /* breaks kdump */ + if (!reset_devices) + intf->rsp_cons = intf->rsp_prod; } - if (xenbus_irq) - unbind_from_irqhandler(xenbus_irq, &xb_waitq); + if (xenbus_irq) { + /* Already have an irq; assume we're resuming */ + rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); + } else { + int err; + err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, + 0, "xenbus", &xb_waitq); + if (err < 0) { + pr_err("request irq failed %i\n", err); + return err; + } - err = bind_evtchn_to_irqhandler( - xen_store_evtchn, wake_waiting, - 0, "xenbus", &xb_waitq); - if (err <= 0) { - printk(KERN_ERR "XENBUS request irq failed %i\n", err); - return err; + xenbus_irq = err; } - xenbus_irq = err; - return 0; } + +void xb_deinit_comms(void) +{ + unbind_from_irqhandler(xenbus_irq, &xb_waitq); + xenbus_irq = 0; +} diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h index c21db751373..e74f9c1fbd8 100644 --- a/drivers/xen/xenbus/xenbus_comms.h +++ b/drivers/xen/xenbus/xenbus_comms.h @@ -31,8 +31,11 @@ #ifndef _XENBUS_COMMS_H #define _XENBUS_COMMS_H +#include <linux/fs.h> + int xs_init(void); int xb_init_comms(void); +void xb_deinit_comms(void); /* Low level routines. */ int xb_write(const void *data, unsigned len); @@ -42,5 +45,8 @@ int xb_wait_for_data_to_read(void); int xs_input_avail(void); extern struct xenstore_domain_interface *xen_store_interface; extern int xen_store_evtchn; +extern enum xenstore_init xen_store_domain_type; + +extern const struct file_operations xen_xenbus_fops; #endif /* _XENBUS_COMMS_H */ diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c new file mode 100644 index 00000000000..b17707ee07d --- /dev/null +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -0,0 +1,142 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/capability.h> + +#include <xen/xen.h> +#include <xen/page.h> +#include <xen/xenbus.h> +#include <xen/xenbus_dev.h> +#include <xen/grant_table.h> +#include <xen/events.h> +#include <asm/xen/hypervisor.h> + +#include "xenbus_comms.h" + +MODULE_LICENSE("GPL"); + +static int xenbus_backend_open(struct inode *inode, struct file *filp) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return nonseekable_open(inode, filp); +} + +static long xenbus_alloc(domid_t domid) +{ + struct evtchn_alloc_unbound arg; + int err = -EEXIST; + + xs_suspend(); + + /* If xenstored_ready is nonzero, that means we have already talked to + * xenstore and set up watches. These watches will be restored by + * xs_resume, but that requires communication over the port established + * below that is not visible to anyone until the ioctl returns. + * + * This can be resolved by splitting the ioctl into two parts + * (postponing the resume until xenstored is active) but this is + * unnecessarily complex for the intended use where xenstored is only + * started once - so return -EEXIST if it's already running. + */ + if (xenstored_ready) + goto out_err; + + gnttab_grant_foreign_access_ref(GNTTAB_RESERVED_XENSTORE, domid, + virt_to_mfn(xen_store_interface), 0 /* writable */); + + arg.dom = DOMID_SELF; + arg.remote_dom = domid; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &arg); + if (err) + goto out_err; + + if (xen_store_evtchn > 0) + xb_deinit_comms(); + + xen_store_evtchn = arg.port; + + xs_resume(); + + return arg.port; + + out_err: + xs_suspend_cancel(); + return err; +} + +static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, + unsigned long data) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (cmd) { + case IOCTL_XENBUS_BACKEND_EVTCHN: + if (xen_store_evtchn > 0) + return xen_store_evtchn; + return -ENODEV; + case IOCTL_XENBUS_BACKEND_SETUP: + return xenbus_alloc(data); + default: + return -ENOTTY; + } +} + +static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) + return -EINVAL; + + if (remap_pfn_range(vma, vma->vm_start, + virt_to_pfn(xen_store_interface), + size, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static const struct file_operations xenbus_backend_fops = { + .open = xenbus_backend_open, + .mmap = xenbus_backend_mmap, + .unlocked_ioctl = xenbus_backend_ioctl, +}; + +static struct miscdevice xenbus_backend_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus_backend", + .fops = &xenbus_backend_fops, +}; + +static int __init xenbus_backend_init(void) +{ + int err; + + if (!xen_initial_domain()) + return -ENODEV; + + err = misc_register(&xenbus_backend_dev); + if (err) + pr_err("Could not register xenbus backend device\n"); + return err; +} + +static void __exit xenbus_backend_exit(void) +{ + misc_deregister(&xenbus_backend_dev); +} + +module_init(xenbus_backend_init); +module_exit(xenbus_backend_exit); diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c new file mode 100644 index 00000000000..85534ea6355 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -0,0 +1,631 @@ +/* + * Driver giving user-space access to the kernel's xenbus connection + * to xenstore. + * + * Copyright (c) 2005, Christian Limpach + * Copyright (c) 2005, Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Changes: + * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem + * and /proc/xen compatibility mount point. + * Turned xenfs into a loadable module. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/uio.h> +#include <linux/notifier.h> +#include <linux/wait.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/mount.h> +#include <linux/pagemap.h> +#include <linux/uaccess.h> +#include <linux/init.h> +#include <linux/namei.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/miscdevice.h> +#include <linux/module.h> + +#include "xenbus_comms.h" + +#include <xen/xenbus.h> +#include <xen/xen.h> +#include <asm/xen/hypervisor.h> + +MODULE_LICENSE("GPL"); + +/* + * An element of a list of outstanding transactions, for which we're + * still waiting a reply. + */ +struct xenbus_transaction_holder { + struct list_head list; + struct xenbus_transaction handle; +}; + +/* + * A buffer of data on the queue. + */ +struct read_buffer { + struct list_head list; + unsigned int cons; + unsigned int len; + char msg[]; +}; + +struct xenbus_file_priv { + /* + * msgbuffer_mutex is held while partial requests are built up + * and complete requests are acted on. It therefore protects + * the "transactions" and "watches" lists, and the partial + * request length and buffer. + * + * reply_mutex protects the reply being built up to return to + * usermode. It nests inside msgbuffer_mutex but may be held + * alone during a watch callback. + */ + struct mutex msgbuffer_mutex; + + /* In-progress transactions */ + struct list_head transactions; + + /* Active watches. */ + struct list_head watches; + + /* Partial request. */ + unsigned int len; + union { + struct xsd_sockmsg msg; + char buffer[XENSTORE_PAYLOAD_MAX]; + } u; + + /* Response queue. */ + struct mutex reply_mutex; + struct list_head read_buffers; + wait_queue_head_t read_waitq; + +}; + +/* Read out any raw xenbus messages queued up. */ +static ssize_t xenbus_file_read(struct file *filp, + char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_file_priv *u = filp->private_data; + struct read_buffer *rb; + unsigned i; + int ret; + + mutex_lock(&u->reply_mutex); +again: + while (list_empty(&u->read_buffers)) { + mutex_unlock(&u->reply_mutex); + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + ret = wait_event_interruptible(u->read_waitq, + !list_empty(&u->read_buffers)); + if (ret) + return ret; + mutex_lock(&u->reply_mutex); + } + + rb = list_entry(u->read_buffers.next, struct read_buffer, list); + i = 0; + while (i < len) { + unsigned sz = min((unsigned)len - i, rb->len - rb->cons); + + ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); + + i += sz - ret; + rb->cons += sz - ret; + + if (ret != 0) { + if (i == 0) + i = -EFAULT; + goto out; + } + + /* Clear out buffer if it has been consumed */ + if (rb->cons == rb->len) { + list_del(&rb->list); + kfree(rb); + if (list_empty(&u->read_buffers)) + break; + rb = list_entry(u->read_buffers.next, + struct read_buffer, list); + } + } + if (i == 0) + goto again; + +out: + mutex_unlock(&u->reply_mutex); + return i; +} + +/* + * Add a buffer to the queue. Caller must hold the appropriate lock + * if the queue is not local. (Commonly the caller will build up + * multiple queued buffers on a temporary local list, and then add it + * to the appropriate list under lock once all the buffers have een + * successfully allocated.) + */ +static int queue_reply(struct list_head *queue, const void *data, size_t len) +{ + struct read_buffer *rb; + + if (len == 0) + return 0; + + rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); + if (rb == NULL) + return -ENOMEM; + + rb->cons = 0; + rb->len = len; + + memcpy(rb->msg, data, len); + + list_add_tail(&rb->list, queue); + return 0; +} + +/* + * Free all the read_buffer s on a list. + * Caller must have sole reference to list. + */ +static void queue_cleanup(struct list_head *list) +{ + struct read_buffer *rb; + + while (!list_empty(list)) { + rb = list_entry(list->next, struct read_buffer, list); + list_del(list->next); + kfree(rb); + } +} + +struct watch_adapter { + struct list_head list; + struct xenbus_watch watch; + struct xenbus_file_priv *dev_data; + char *token; +}; + +static void free_watch_adapter(struct watch_adapter *watch) +{ + kfree(watch->watch.node); + kfree(watch->token); + kfree(watch); +} + +static struct watch_adapter *alloc_watch_adapter(const char *path, + const char *token) +{ + struct watch_adapter *watch; + + watch = kzalloc(sizeof(*watch), GFP_KERNEL); + if (watch == NULL) + goto out_fail; + + watch->watch.node = kstrdup(path, GFP_KERNEL); + if (watch->watch.node == NULL) + goto out_free; + + watch->token = kstrdup(token, GFP_KERNEL); + if (watch->token == NULL) + goto out_free; + + return watch; + +out_free: + free_watch_adapter(watch); + +out_fail: + return NULL; +} + +static void watch_fired(struct xenbus_watch *watch, + const char **vec, + unsigned int len) +{ + struct watch_adapter *adap; + struct xsd_sockmsg hdr; + const char *path, *token; + int path_len, tok_len, body_len, data_len = 0; + int ret; + LIST_HEAD(staging_q); + + adap = container_of(watch, struct watch_adapter, watch); + + path = vec[XS_WATCH_PATH]; + token = adap->token; + + path_len = strlen(path) + 1; + tok_len = strlen(token) + 1; + if (len > 2) + data_len = vec[len] - vec[2] + 1; + body_len = path_len + tok_len + data_len; + + hdr.type = XS_WATCH_EVENT; + hdr.len = body_len; + + mutex_lock(&adap->dev_data->reply_mutex); + + ret = queue_reply(&staging_q, &hdr, sizeof(hdr)); + if (!ret) + ret = queue_reply(&staging_q, path, path_len); + if (!ret) + ret = queue_reply(&staging_q, token, tok_len); + if (!ret && len > 2) + ret = queue_reply(&staging_q, vec[2], data_len); + + if (!ret) { + /* success: pass reply list onto watcher */ + list_splice_tail(&staging_q, &adap->dev_data->read_buffers); + wake_up(&adap->dev_data->read_waitq); + } else + queue_cleanup(&staging_q); + + mutex_unlock(&adap->dev_data->reply_mutex); +} + +static int xenbus_write_transaction(unsigned msg_type, + struct xenbus_file_priv *u) +{ + int rc; + void *reply; + struct xenbus_transaction_holder *trans = NULL; + LIST_HEAD(staging_q); + + if (msg_type == XS_TRANSACTION_START) { + trans = kmalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) { + rc = -ENOMEM; + goto out; + } + } + + reply = xenbus_dev_request_and_reply(&u->u.msg); + if (IS_ERR(reply)) { + kfree(trans); + rc = PTR_ERR(reply); + goto out; + } + + if (msg_type == XS_TRANSACTION_START) { + trans->handle.id = simple_strtoul(reply, NULL, 0); + + list_add(&trans->list, &u->transactions); + } else if (msg_type == XS_TRANSACTION_END) { + list_for_each_entry(trans, &u->transactions, list) + if (trans->handle.id == u->u.msg.tx_id) + break; + BUG_ON(&trans->list == &u->transactions); + list_del(&trans->list); + + kfree(trans); + } + + mutex_lock(&u->reply_mutex); + rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg)); + if (!rc) + rc = queue_reply(&staging_q, reply, u->u.msg.len); + if (!rc) { + list_splice_tail(&staging_q, &u->read_buffers); + wake_up(&u->read_waitq); + } else { + queue_cleanup(&staging_q); + } + mutex_unlock(&u->reply_mutex); + + kfree(reply); + +out: + return rc; +} + +static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) +{ + struct watch_adapter *watch, *tmp_watch; + char *path, *token; + int err, rc; + LIST_HEAD(staging_q); + + path = u->u.buffer + sizeof(u->u.msg); + token = memchr(path, 0, u->u.msg.len); + if (token == NULL) { + rc = -EILSEQ; + goto out; + } + token++; + if (memchr(token, 0, u->u.msg.len - (token - path)) == NULL) { + rc = -EILSEQ; + goto out; + } + + if (msg_type == XS_WATCH) { + watch = alloc_watch_adapter(path, token); + if (watch == NULL) { + rc = -ENOMEM; + goto out; + } + + watch->watch.callback = watch_fired; + watch->dev_data = u; + + err = register_xenbus_watch(&watch->watch); + if (err) { + free_watch_adapter(watch); + rc = err; + goto out; + } + list_add(&watch->list, &u->watches); + } else { + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { + if (!strcmp(watch->token, token) && + !strcmp(watch->watch.node, path)) { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + break; + } + } + } + + /* Success. Synthesize a reply to say all is OK. */ + { + struct { + struct xsd_sockmsg hdr; + char body[3]; + } __packed reply = { + { + .type = msg_type, + .len = sizeof(reply.body) + }, + "OK" + }; + + mutex_lock(&u->reply_mutex); + rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); + wake_up(&u->read_waitq); + mutex_unlock(&u->reply_mutex); + } + +out: + return rc; +} + +static ssize_t xenbus_file_write(struct file *filp, + const char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_file_priv *u = filp->private_data; + uint32_t msg_type; + int rc = len; + int ret; + LIST_HEAD(staging_q); + + /* + * We're expecting usermode to be writing properly formed + * xenbus messages. If they write an incomplete message we + * buffer it up. Once it is complete, we act on it. + */ + + /* + * Make sure concurrent writers can't stomp all over each + * other's messages and make a mess of our partial message + * buffer. We don't make any attemppt to stop multiple + * writers from making a mess of each other's incomplete + * messages; we're just trying to guarantee our own internal + * consistency and make sure that single writes are handled + * atomically. + */ + mutex_lock(&u->msgbuffer_mutex); + + /* Get this out of the way early to avoid confusion */ + if (len == 0) + goto out; + + /* Can't write a xenbus message larger we can buffer */ + if (len > sizeof(u->u.buffer) - u->len) { + /* On error, dump existing buffer */ + u->len = 0; + rc = -EINVAL; + goto out; + } + + ret = copy_from_user(u->u.buffer + u->len, ubuf, len); + + if (ret != 0) { + rc = -EFAULT; + goto out; + } + + /* Deal with a partial copy. */ + len -= ret; + rc = len; + + u->len += len; + + /* Return if we haven't got a full message yet */ + if (u->len < sizeof(u->u.msg)) + goto out; /* not even the header yet */ + + /* If we're expecting a message that's larger than we can + possibly send, dump what we have and return an error. */ + if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) { + rc = -E2BIG; + u->len = 0; + goto out; + } + + if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) + goto out; /* incomplete data portion */ + + /* + * OK, now we have a complete message. Do something with it. + */ + + msg_type = u->u.msg.type; + + switch (msg_type) { + case XS_WATCH: + case XS_UNWATCH: + /* (Un)Ask for some path to be watched for changes */ + ret = xenbus_write_watch(msg_type, u); + break; + + default: + /* Send out a transaction */ + ret = xenbus_write_transaction(msg_type, u); + break; + } + if (ret != 0) + rc = ret; + + /* Buffered message consumed */ + u->len = 0; + + out: + mutex_unlock(&u->msgbuffer_mutex); + return rc; +} + +static int xenbus_file_open(struct inode *inode, struct file *filp) +{ + struct xenbus_file_priv *u; + + if (xen_store_evtchn == 0) + return -ENOENT; + + nonseekable_open(inode, filp); + + u = kzalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&u->transactions); + INIT_LIST_HEAD(&u->watches); + INIT_LIST_HEAD(&u->read_buffers); + init_waitqueue_head(&u->read_waitq); + + mutex_init(&u->reply_mutex); + mutex_init(&u->msgbuffer_mutex); + + filp->private_data = u; + + return 0; +} + +static int xenbus_file_release(struct inode *inode, struct file *filp) +{ + struct xenbus_file_priv *u = filp->private_data; + struct xenbus_transaction_holder *trans, *tmp; + struct watch_adapter *watch, *tmp_watch; + struct read_buffer *rb, *tmp_rb; + + /* + * No need for locking here because there are no other users, + * by definition. + */ + + list_for_each_entry_safe(trans, tmp, &u->transactions, list) { + xenbus_transaction_end(trans->handle, 1); + list_del(&trans->list); + kfree(trans); + } + + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + } + + list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { + list_del(&rb->list); + kfree(rb); + } + kfree(u); + + return 0; +} + +static unsigned int xenbus_file_poll(struct file *file, poll_table *wait) +{ + struct xenbus_file_priv *u = file->private_data; + + poll_wait(file, &u->read_waitq, wait); + if (!list_empty(&u->read_buffers)) + return POLLIN | POLLRDNORM; + return 0; +} + +const struct file_operations xen_xenbus_fops = { + .read = xenbus_file_read, + .write = xenbus_file_write, + .open = xenbus_file_open, + .release = xenbus_file_release, + .poll = xenbus_file_poll, + .llseek = no_llseek, +}; +EXPORT_SYMBOL_GPL(xen_xenbus_fops); + +static struct miscdevice xenbus_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus", + .fops = &xen_xenbus_fops, +}; + +static int __init xenbus_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + err = misc_register(&xenbus_dev); + if (err) + pr_err("Could not register xenbus frontend device\n"); + return err; +} + +static void __exit xenbus_exit(void) +{ + misc_deregister(&xenbus_dev); +} + +module_init(xenbus_init); +module_exit(xenbus_exit); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 4750de316ad..3c0a74b3e9b 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -30,6 +30,8 @@ * IN THE SOFTWARE. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #define DPRINTK(fmt, args...) \ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ __func__, __LINE__, ##args) @@ -40,32 +42,41 @@ #include <linux/ctype.h> #include <linux/fcntl.h> #include <linux/mm.h> +#include <linux/proc_fs.h> #include <linux/notifier.h> #include <linux/kthread.h> #include <linux/mutex.h> #include <linux/io.h> +#include <linux/slab.h> +#include <linux/module.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/xen/hypervisor.h> + +#include <xen/xen.h> #include <xen/xenbus.h> #include <xen/events.h> #include <xen/page.h> +#include <xen/hvm.h> + #include "xenbus_comms.h" #include "xenbus_probe.h" + int xen_store_evtchn; -struct xenstore_domain_interface *xen_store_interface; -static unsigned long xen_store_mfn; +EXPORT_SYMBOL_GPL(xen_store_evtchn); -static BLOCKING_NOTIFIER_HEAD(xenstore_chain); +struct xenstore_domain_interface *xen_store_interface; +EXPORT_SYMBOL_GPL(xen_store_interface); -static void wait_for_devices(struct xenbus_driver *xendrv); +enum xenstore_init xen_store_domain_type; +EXPORT_SYMBOL_GPL(xen_store_domain_type); -static int xenbus_probe_frontend(const char *type, const char *name); +static unsigned long xen_store_mfn; -static void xenbus_dev_shutdown(struct device *_dev); +static BLOCKING_NOTIFIER_HEAD(xenstore_chain); /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * @@ -87,24 +98,7 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv) return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; } - -/* device/<type>/<id> => <type>-<id> */ -static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) -{ - nodename = strchr(nodename, '/'); - if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) { - printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); - return -EINVAL; - } - - strlcpy(bus_id, nodename + 1, BUS_ID_SIZE); - if (!strchr(bus_id, '/')) { - printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); - return -EINVAL; - } - *strchr(bus_id, '/') = '-'; - return 0; -} +EXPORT_SYMBOL_GPL(xenbus_match); static void free_otherend_details(struct xenbus_device *dev) @@ -124,7 +118,30 @@ static void free_otherend_watch(struct xenbus_device *dev) } -int read_otherend_details(struct xenbus_device *xendev, +static int talk_to_otherend(struct xenbus_device *dev) +{ + struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); + + free_otherend_watch(dev); + free_otherend_details(dev); + + return drv->read_otherend_details(dev); +} + + + +static int watch_otherend(struct xenbus_device *dev) +{ + struct xen_bus_type *bus = + container_of(dev->dev.bus, struct xen_bus_type, bus); + + return xenbus_watch_pathfmt(dev, &dev->otherend_watch, + bus->otherend_changed, + "%s/%s", dev->otherend, "state"); +} + + +int xenbus_read_otherend_details(struct xenbus_device *xendev, char *id_node, char *path_node) { int err = xenbus_gather(XBT_NIL, xendev->nodename, @@ -149,31 +166,11 @@ int read_otherend_details(struct xenbus_device *xendev, return 0; } +EXPORT_SYMBOL_GPL(xenbus_read_otherend_details); - -static int read_backend_details(struct xenbus_device *xendev) -{ - return read_otherend_details(xendev, "backend-id", "backend"); -} - - -/* Bus type for frontend drivers. */ -static struct xen_bus_type xenbus_frontend = { - .root = "device", - .levels = 2, /* device/type/<id> */ - .get_bus_id = frontend_bus_id, - .probe = xenbus_probe_frontend, - .bus = { - .name = "xen", - .match = xenbus_match, - .probe = xenbus_dev_probe, - .remove = xenbus_dev_remove, - .shutdown = xenbus_dev_shutdown, - }, -}; - -static void otherend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) +void xenbus_otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len, + int ignore_on_shutdown) { struct xenbus_device *dev = container_of(watch, struct xenbus_device, otherend_watch); @@ -201,11 +198,7 @@ static void otherend_changed(struct xenbus_watch *watch, * work that can fail e.g., when the rootfs is gone. */ if (system_state > SYSTEM_RUNNING) { - struct xen_bus_type *bus = bus; - bus = container_of(dev->dev.bus, struct xen_bus_type, bus); - /* If we're frontend, drive the state machine to Closed. */ - /* This should cause the backend to release our resources. */ - if ((bus == &xenbus_frontend) && (state == XenbusStateClosing)) + if (ignore_on_shutdown && (state == XenbusStateClosing)) xenbus_frontend_closed(dev); return; } @@ -213,25 +206,7 @@ static void otherend_changed(struct xenbus_watch *watch, if (drv->otherend_changed) drv->otherend_changed(dev, state); } - - -static int talk_to_otherend(struct xenbus_device *dev) -{ - struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); - - free_otherend_watch(dev); - free_otherend_details(dev); - - return drv->read_otherend_details(dev); -} - - -static int watch_otherend(struct xenbus_device *dev) -{ - return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed, - "%s/%s", dev->otherend, "state"); -} - +EXPORT_SYMBOL_GPL(xenbus_otherend_changed); int xenbus_dev_probe(struct device *_dev) { @@ -275,8 +250,9 @@ int xenbus_dev_probe(struct device *_dev) fail: xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); xenbus_switch_state(dev, XenbusStateClosed); - return -ENODEV; + return err; } +EXPORT_SYMBOL_GPL(xenbus_dev_probe); int xenbus_dev_remove(struct device *_dev) { @@ -286,16 +262,18 @@ int xenbus_dev_remove(struct device *_dev) DPRINTK("%s", dev->nodename); free_otherend_watch(dev); - free_otherend_details(dev); if (drv->remove) drv->remove(dev); + free_otherend_details(dev); + xenbus_switch_state(dev, XenbusStateClosed); return 0; } +EXPORT_SYMBOL_GPL(xenbus_dev_remove); -static void xenbus_dev_shutdown(struct device *_dev) +void xenbus_dev_shutdown(struct device *_dev) { struct xenbus_device *dev = to_xenbus_device(_dev); unsigned long timeout = 5*HZ; @@ -304,50 +282,28 @@ static void xenbus_dev_shutdown(struct device *_dev) get_device(&dev->dev); if (dev->state != XenbusStateConnected) { - printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__, - dev->nodename, xenbus_strstate(dev->state)); + pr_info("%s: %s: %s != Connected, skipping\n", + __func__, dev->nodename, xenbus_strstate(dev->state)); goto out; } xenbus_switch_state(dev, XenbusStateClosing); timeout = wait_for_completion_timeout(&dev->down, timeout); if (!timeout) - printk(KERN_INFO "%s: %s timeout closing device\n", - __func__, dev->nodename); + pr_info("%s: %s timeout closing device\n", + __func__, dev->nodename); out: put_device(&dev->dev); } +EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name) + struct xen_bus_type *bus) { - drv->driver.name = drv->name; drv->driver.bus = &bus->bus; - drv->driver.owner = owner; - drv->driver.mod_name = mod_name; return driver_register(&drv->driver); } - -int __xenbus_register_frontend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) -{ - int ret; - - drv->read_otherend_details = read_backend_details; - - ret = xenbus_register_driver_common(drv, &xenbus_frontend, - owner, mod_name); - if (ret) - return ret; - - /* If this driver is loaded as a module wait for devices to attach. */ - wait_for_devices(drv); - - return 0; -} -EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +EXPORT_SYMBOL_GPL(xenbus_register_driver_common); void xenbus_unregister_driver(struct xenbus_driver *drv) { @@ -355,8 +311,7 @@ void xenbus_unregister_driver(struct xenbus_driver *drv) } EXPORT_SYMBOL_GPL(xenbus_unregister_driver); -struct xb_find_info -{ +struct xb_find_info { struct xenbus_device *dev; const char *nodename; }; @@ -374,8 +329,8 @@ static int cmp_dev(struct device *dev, void *data) return 0; } -struct xenbus_device *xenbus_device_find(const char *nodename, - struct bus_type *bus) +static struct xenbus_device *xenbus_device_find(const char *nodename, + struct bus_type *bus) { struct xb_find_info info = { .dev = NULL, .nodename = nodename }; @@ -424,25 +379,50 @@ static void xenbus_dev_release(struct device *dev) kfree(to_xenbus_device(dev)); } -static ssize_t xendev_show_nodename(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t nodename_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); } -DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); +static DEVICE_ATTR_RO(nodename); -static ssize_t xendev_show_devtype(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t devtype_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); } -DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); +static DEVICE_ATTR_RO(devtype); + +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s:%s\n", dev->bus->name, + to_xenbus_device(dev)->devicetype); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *xenbus_dev_attrs[] = { + &dev_attr_nodename.attr, + &dev_attr_devtype.attr, + &dev_attr_modalias.attr, + NULL, +}; +static const struct attribute_group xenbus_dev_group = { + .attrs = xenbus_dev_attrs, +}; + +const struct attribute_group *xenbus_dev_groups[] = { + &xenbus_dev_group, + NULL, +}; +EXPORT_SYMBOL_GPL(xenbus_dev_groups); int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename) { + char devname[XEN_BUS_ID_SIZE]; int err; struct xenbus_device *xendev; size_t stringlen; @@ -477,50 +457,23 @@ int xenbus_probe_node(struct xen_bus_type *bus, xendev->dev.bus = &bus->bus; xendev->dev.release = xenbus_dev_release; - err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename); + err = bus->get_bus_id(devname, xendev->nodename); if (err) goto fail; + dev_set_name(&xendev->dev, "%s", devname); + /* Register with generic device framework. */ err = device_register(&xendev->dev); if (err) goto fail; - err = device_create_file(&xendev->dev, &dev_attr_nodename); - if (err) - goto fail_unregister; - - err = device_create_file(&xendev->dev, &dev_attr_devtype); - if (err) - goto fail_remove_file; - return 0; -fail_remove_file: - device_remove_file(&xendev->dev, &dev_attr_nodename); -fail_unregister: - device_unregister(&xendev->dev); fail: kfree(xendev); return err; } - -/* device/<typename>/<name> */ -static int xenbus_probe_frontend(const char *type, const char *name) -{ - char *nodename; - int err; - - nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", - xenbus_frontend.root, type, name); - if (!nodename) - return -ENOMEM; - - DPRINTK("%s", nodename); - - err = xenbus_probe_node(&xenbus_frontend, type, nodename); - kfree(nodename); - return err; -} +EXPORT_SYMBOL_GPL(xenbus_probe_node); static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) { @@ -534,10 +487,11 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) return PTR_ERR(dir); for (i = 0; i < dir_n; i++) { - err = bus->probe(type, dir[i]); + err = bus->probe(bus, type, dir[i]); if (err) break; } + kfree(dir); return err; } @@ -557,9 +511,11 @@ int xenbus_probe_devices(struct xen_bus_type *bus) if (err) break; } + kfree(dir); return err; } +EXPORT_SYMBOL_GPL(xenbus_probe_devices); static unsigned int char_count(const char *str, char c) { @@ -588,7 +544,7 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) { int exists, rootlen; struct xenbus_device *dev; - char type[BUS_ID_SIZE]; + char type[XEN_BUS_ID_SIZE]; const char *p, *root; if (char_count(node, '/') < 2) @@ -602,8 +558,8 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) /* backend/<type>/... or device/<type>/... */ p = strchr(node, '/') + 1; - snprintf(type, BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p); - type[BUS_ID_SIZE-1] = '\0'; + snprintf(type, XEN_BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p); + type[XEN_BUS_ID_SIZE-1] = '\0'; rootlen = strsep_len(node, '/', bus->levels); if (rootlen < 0) @@ -620,81 +576,44 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) kfree(root); } +EXPORT_SYMBOL_GPL(xenbus_dev_changed); -static void frontend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) -{ - DPRINTK(""); - - xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); -} - -/* We watch for devices appearing and vanishing. */ -static struct xenbus_watch fe_watch = { - .node = "device", - .callback = frontend_changed, -}; - -static int suspend_dev(struct device *dev, void *data) +int xenbus_dev_suspend(struct device *dev) { int err = 0; struct xenbus_driver *drv; - struct xenbus_device *xdev; + struct xenbus_device *xdev + = container_of(dev, struct xenbus_device, dev); - DPRINTK(""); + DPRINTK("%s", xdev->nodename); if (dev->driver == NULL) return 0; drv = to_xenbus_driver(dev->driver); - xdev = container_of(dev, struct xenbus_device, dev); if (drv->suspend) err = drv->suspend(xdev); if (err) - printk(KERN_WARNING - "xenbus: suspend %s failed: %i\n", dev->bus_id, err); + pr_warn("suspend %s failed: %i\n", dev_name(dev), err); return 0; } +EXPORT_SYMBOL_GPL(xenbus_dev_suspend); -static int suspend_cancel_dev(struct device *dev, void *data) -{ - int err = 0; - struct xenbus_driver *drv; - struct xenbus_device *xdev; - - DPRINTK(""); - - if (dev->driver == NULL) - return 0; - drv = to_xenbus_driver(dev->driver); - xdev = container_of(dev, struct xenbus_device, dev); - if (drv->suspend_cancel) - err = drv->suspend_cancel(xdev); - if (err) - printk(KERN_WARNING - "xenbus: suspend_cancel %s failed: %i\n", - dev->bus_id, err); - return 0; -} - -static int resume_dev(struct device *dev, void *data) +int xenbus_dev_resume(struct device *dev) { int err; struct xenbus_driver *drv; - struct xenbus_device *xdev; + struct xenbus_device *xdev + = container_of(dev, struct xenbus_device, dev); - DPRINTK(""); + DPRINTK("%s", xdev->nodename); if (dev->driver == NULL) return 0; - drv = to_xenbus_driver(dev->driver); - xdev = container_of(dev, struct xenbus_device, dev); - err = talk_to_otherend(xdev); if (err) { - printk(KERN_WARNING - "xenbus: resume (talk_to_otherend) %s failed: %i\n", - dev->bus_id, err); + pr_warn("resume (talk_to_otherend) %s failed: %i\n", + dev_name(dev), err); return err; } @@ -703,53 +622,32 @@ static int resume_dev(struct device *dev, void *data) if (drv->resume) { err = drv->resume(xdev); if (err) { - printk(KERN_WARNING - "xenbus: resume %s failed: %i\n", - dev->bus_id, err); + pr_warn("resume %s failed: %i\n", dev_name(dev), err); return err; } } err = watch_otherend(xdev); if (err) { - printk(KERN_WARNING - "xenbus_probe: resume (watch_otherend) %s failed: " - "%d.\n", dev->bus_id, err); + pr_warn("resume (watch_otherend) %s failed: %d.\n", + dev_name(dev), err); return err; } return 0; } +EXPORT_SYMBOL_GPL(xenbus_dev_resume); -void xenbus_suspend(void) -{ - DPRINTK(""); - - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); - xenbus_backend_suspend(suspend_dev); - xs_suspend(); -} -EXPORT_SYMBOL_GPL(xenbus_suspend); - -void xenbus_resume(void) -{ - xb_init_comms(); - xs_resume(); - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); - xenbus_backend_resume(resume_dev); -} -EXPORT_SYMBOL_GPL(xenbus_resume); - -void xenbus_suspend_cancel(void) +int xenbus_dev_cancel(struct device *dev) { - xs_suspend_cancel(); - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev); - xenbus_backend_resume(suspend_cancel_dev); + /* Do nothing */ + DPRINTK("cancel"); + return 0; } -EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); +EXPORT_SYMBOL_GPL(xenbus_dev_cancel); /* A flag to determine if xenstored is 'ready' (i.e. has started) */ -int xenstored_ready = 0; +int xenstored_ready; int register_xenstore_notifier(struct notifier_block *nb) @@ -773,164 +671,136 @@ EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); void xenbus_probe(struct work_struct *unused) { - BUG_ON((xenstored_ready <= 0)); - - /* Enumerate devices in xenstore and watch for changes. */ - xenbus_probe_devices(&xenbus_frontend); - register_xenbus_watch(&fe_watch); - xenbus_backend_probe_and_watch(); + xenstored_ready = 1; /* Notify others that xenstore is up */ blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } +EXPORT_SYMBOL_GPL(xenbus_probe); -static int __init xenbus_probe_init(void) +static int __init xenbus_probe_initcall(void) { - int err = 0; + if (!xen_domain()) + return -ENODEV; - DPRINTK(""); - - err = -ENODEV; - if (!is_running_on_xen()) - goto out_error; - - /* Register ourselves with the kernel bus subsystem */ - err = bus_register(&xenbus_frontend.bus); - if (err) - goto out_error; - - err = xenbus_backend_bus_register(); - if (err) - goto out_unreg_front; - - /* - * Domain0 doesn't have a store_evtchn or store_mfn yet. - */ - if (is_initial_xendomain()) { - /* dom0 not yet supported */ - } else { - xenstored_ready = 1; - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - } - xen_store_interface = mfn_to_virt(xen_store_mfn); - - /* Initialize the interface to xenstore. */ - err = xs_init(); - if (err) { - printk(KERN_WARNING - "XENBUS: Error initializing xenstore comms: %i\n", err); - goto out_unreg_back; - } - - if (!is_initial_xendomain()) - xenbus_probe(NULL); + if (xen_initial_domain() || xen_hvm_domain()) + return 0; + xenbus_probe(NULL); return 0; - - out_unreg_back: - xenbus_backend_bus_unregister(); - - out_unreg_front: - bus_unregister(&xenbus_frontend.bus); - - out_error: - return err; } -postcore_initcall(xenbus_probe_init); - -MODULE_LICENSE("GPL"); +device_initcall(xenbus_probe_initcall); -static int is_disconnected_device(struct device *dev, void *data) +/* Set up event channel for xenstored which is run as a local process + * (this is normally used only in dom0) + */ +static int __init xenstored_local_init(void) { - struct xenbus_device *xendev = to_xenbus_device(dev); - struct device_driver *drv = data; + int err = 0; + unsigned long page = 0; + struct evtchn_alloc_unbound alloc_unbound; - /* - * A device with no driver will never connect. We care only about - * devices which should currently be in the process of connecting. - */ - if (!dev->driver) - return 0; + /* Allocate Xenstore page */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + goto out_err; - /* Is this search limited to a particular driver? */ - if (drv && (dev->driver != drv)) - return 0; + xen_store_mfn = xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); - return (xendev->state != XenbusStateConnected); -} + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; -static int exists_disconnected_device(struct device_driver *drv) -{ - return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, - is_disconnected_device); -} - -static int print_device_status(struct device *dev, void *data) -{ - struct xenbus_device *xendev = to_xenbus_device(dev); - struct device_driver *drv = data; - - /* Is this operation limited to a particular driver? */ - if (drv && (dev->driver != drv)) - return 0; + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto out_err; - if (!dev->driver) { - /* Information only: is this too noisy? */ - printk(KERN_INFO "XENBUS: Device with no driver: %s\n", - xendev->nodename); - } else if (xendev->state != XenbusStateConnected) { - printk(KERN_WARNING "XENBUS: Timeout connecting " - "to device: %s (state %d)\n", - xendev->nodename, xendev->state); - } + BUG_ON(err); + xen_store_evtchn = xen_start_info->store_evtchn = + alloc_unbound.port; return 0; -} -/* We only wait for device setup after most initcalls have run. */ -static int ready_to_wait_for_devices; + out_err: + if (page != 0) + free_page(page); + return err; +} -/* - * On a 10 second timeout, wait for all devices currently configured. We need - * to do this to guarantee that the filesystems and / or network devices - * needed for boot are available, before we can allow the boot to proceed. - * - * This needs to be on a late_initcall, to happen after the frontend device - * drivers have been initialised, but before the root fs is mounted. - * - * A possible improvement here would be to have the tools add a per-device - * flag to the store entry, indicating whether it is needed at boot time. - * This would allow people who knew what they were doing to accelerate their - * boot slightly, but of course needs tools or manual intervention to set up - * those flags correctly. - */ -static void wait_for_devices(struct xenbus_driver *xendrv) +static int __init xenbus_init(void) { - unsigned long timeout = jiffies + 10*HZ; - struct device_driver *drv = xendrv ? &xendrv->driver : NULL; + int err = 0; + uint64_t v = 0; + xen_store_domain_type = XS_UNKNOWN; + + if (!xen_domain()) + return -ENODEV; + + xenbus_ring_ops_init(); + + if (xen_pv_domain()) + xen_store_domain_type = XS_PV; + if (xen_hvm_domain()) + xen_store_domain_type = XS_HVM; + if (xen_hvm_domain() && xen_initial_domain()) + xen_store_domain_type = XS_LOCAL; + if (xen_pv_domain() && !xen_start_info->store_evtchn) + xen_store_domain_type = XS_LOCAL; + if (xen_pv_domain() && xen_start_info->store_evtchn) + xenstored_ready = 1; - if (!ready_to_wait_for_devices || !is_running_on_xen()) - return; + switch (xen_store_domain_type) { + case XS_LOCAL: + err = xenstored_local_init(); + if (err) + goto out_error; + xen_store_interface = mfn_to_virt(xen_store_mfn); + break; + case XS_PV: + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + xen_store_interface = mfn_to_virt(xen_store_mfn); + break; + case XS_HVM: + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto out_error; + xen_store_evtchn = (int)v; + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err) + goto out_error; + xen_store_mfn = (unsigned long)v; + xen_store_interface = + xen_remap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); + break; + default: + pr_warn("Xenstore state unknown\n"); + break; + } - while (exists_disconnected_device(drv)) { - if (time_after(jiffies, timeout)) - break; - schedule_timeout_interruptible(HZ/10); + /* Initialize the interface to xenstore. */ + err = xs_init(); + if (err) { + pr_warn("Error initializing xenstore comms: %i\n", err); + goto out_error; } - bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, - print_device_status); -} +#ifdef CONFIG_XEN_COMPAT_XENFS + /* + * Create xenfs mountpoint in /proc for compatibility with + * utilities that expect to find "xenbus" under "/proc/xen". + */ + proc_mkdir("xen", NULL); +#endif -#ifndef MODULE -static int __init boot_wait_for_devices(void) -{ - ready_to_wait_for_devices = 1; - wait_for_devices(NULL); - return 0; +out_error: + return err; } -late_initcall(boot_wait_for_devices); -#endif +postcore_initcall(xenbus_init); + +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index e09b19415a4..1085ec294a1 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -34,36 +34,33 @@ #ifndef _XENBUS_PROBE_H #define _XENBUS_PROBE_H -#ifdef CONFIG_XEN_BACKEND -extern void xenbus_backend_suspend(int (*fn)(struct device *, void *)); -extern void xenbus_backend_resume(int (*fn)(struct device *, void *)); -extern void xenbus_backend_probe_and_watch(void); -extern int xenbus_backend_bus_register(void); -extern void xenbus_backend_bus_unregister(void); -#else -static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {} -static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {} -static inline void xenbus_backend_probe_and_watch(void) {} -static inline int xenbus_backend_bus_register(void) { return 0; } -static inline void xenbus_backend_bus_unregister(void) {} -#endif +#define XEN_BUS_ID_SIZE 20 -struct xen_bus_type -{ +struct xen_bus_type { char *root; unsigned int levels; - int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename); - int (*probe)(const char *type, const char *dir); + int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); + int (*probe)(struct xen_bus_type *bus, const char *type, + const char *dir); + void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, + unsigned int len); struct bus_type bus; }; +enum xenstore_init { + XS_UNKNOWN, + XS_PV, + XS_HVM, + XS_LOCAL, +}; + +extern const struct attribute_group *xenbus_dev_groups[]; + extern int xenbus_match(struct device *_dev, struct device_driver *_drv); extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); extern int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name); + struct xen_bus_type *bus); extern int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); @@ -71,4 +68,19 @@ extern int xenbus_probe_devices(struct xen_bus_type *bus); extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); +extern void xenbus_dev_shutdown(struct device *_dev); + +extern int xenbus_dev_suspend(struct device *dev); +extern int xenbus_dev_resume(struct device *dev); +extern int xenbus_dev_cancel(struct device *dev); + +extern void xenbus_otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len, + int ignore_on_shutdown); + +extern int xenbus_read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node); + +void xenbus_ring_ops_init(void); + #endif diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c new file mode 100644 index 00000000000..5125dce11a6 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -0,0 +1,274 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have (backend half). + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005, 2006 XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define DPRINTK(fmt, ...) \ + pr_debug("(%s:%d) " fmt "\n", \ + __func__, __LINE__, ##__VA_ARGS__) + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/notifier.h> +#include <linux/export.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/xen/hypervisor.h> +#include <asm/hypervisor.h> +#include <xen/xenbus.h> +#include <xen/features.h> + +#include "xenbus_comms.h" +#include "xenbus_probe.h" + +/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */ +static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) +{ + int domid, err; + const char *devid, *type, *frontend; + unsigned int typelen; + + type = strchr(nodename, '/'); + if (!type) + return -EINVAL; + type++; + typelen = strcspn(type, "/"); + if (!typelen || type[typelen] != '/') + return -EINVAL; + + devid = strrchr(nodename, '/') + 1; + + err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid, + "frontend", NULL, &frontend, + NULL); + if (err) + return err; + if (strlen(frontend) == 0) + err = -ERANGE; + if (!err && !xenbus_exists(XBT_NIL, frontend, "")) + err = -ENOENT; + kfree(frontend); + + if (err) + return err; + + if (snprintf(bus_id, XEN_BUS_ID_SIZE, "%.*s-%i-%s", + typelen, type, domid, devid) >= XEN_BUS_ID_SIZE) + return -ENOSPC; + return 0; +} + +static int xenbus_uevent_backend(struct device *dev, + struct kobj_uevent_env *env) +{ + struct xenbus_device *xdev; + struct xenbus_driver *drv; + struct xen_bus_type *bus; + + DPRINTK(""); + + if (dev == NULL) + return -ENODEV; + + xdev = to_xenbus_device(dev); + bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); + + if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) + return -ENOMEM; + + /* stuff we want to pass to /sbin/hotplug */ + if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) + return -ENOMEM; + + if (add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename)) + return -ENOMEM; + + if (add_uevent_var(env, "XENBUS_BASE_PATH=%s", bus->root)) + return -ENOMEM; + + if (dev->driver) { + drv = to_xenbus_driver(dev->driver); + if (drv && drv->uevent) + return drv->uevent(xdev, env); + } + + return 0; +} + +/* backend/<typename>/<frontend-uuid>/<name> */ +static int xenbus_probe_backend_unit(struct xen_bus_type *bus, + const char *dir, + const char *type, + const char *name) +{ + char *nodename; + int err; + + nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s\n", nodename); + + err = xenbus_probe_node(bus, type, nodename); + kfree(nodename); + return err; +} + +/* backend/<typename>/<frontend-domid> */ +static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, + const char *domid) +{ + char *nodename; + int err = 0; + char **dir; + unsigned int i, dir_n = 0; + + DPRINTK(""); + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, domid); + if (!nodename) + return -ENOMEM; + + dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n); + if (IS_ERR(dir)) { + kfree(nodename); + return PTR_ERR(dir); + } + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_backend_unit(bus, nodename, type, dir[i]); + if (err) + break; + } + kfree(dir); + kfree(nodename); + return err; +} + +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + xenbus_otherend_changed(watch, vec, len, 0); +} + +static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type/<frontend>/<id> */ + .get_bus_id = backend_bus_id, + .probe = xenbus_probe_backend, + .otherend_changed = frontend_changed, + .bus = { + .name = "xen-backend", + .match = xenbus_match, + .uevent = xenbus_uevent_backend, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, + .dev_groups = xenbus_dev_groups, + }, +}; + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); +} + +static struct xenbus_watch be_watch = { + .node = "backend", + .callback = backend_changed, +}; + +static int read_frontend_details(struct xenbus_device *xendev) +{ + return xenbus_read_otherend_details(xendev, "frontend-id", "frontend"); +} + +int xenbus_dev_is_online(struct xenbus_device *dev) +{ + int rc, val; + + rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val); + if (rc != 1) + val = 0; /* no online node present */ + + return val; +} +EXPORT_SYMBOL_GPL(xenbus_dev_is_online); + +int xenbus_register_backend(struct xenbus_driver *drv) +{ + drv->read_otherend_details = read_frontend_details; + + return xenbus_register_driver_common(drv, &xenbus_backend); +} +EXPORT_SYMBOL_GPL(xenbus_register_backend); + +static int backend_probe_and_watch(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + /* Enumerate devices in xenstore and watch for changes. */ + xenbus_probe_devices(&xenbus_backend); + register_xenbus_watch(&be_watch); + + return NOTIFY_DONE; +} + +static int __init xenbus_probe_backend_init(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = backend_probe_and_watch + }; + int err; + + DPRINTK(""); + + /* Register ourselves with the kernel bus subsystem */ + err = bus_register(&xenbus_backend.bus); + if (err) + return err; + + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} +subsys_initcall(xenbus_probe_backend_init); diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c new file mode 100644 index 00000000000..cb385c10d2b --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -0,0 +1,510 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define DPRINTK(fmt, ...) \ + pr_debug("(%s:%d) " fmt "\n", \ + __func__, __LINE__, ##__VA_ARGS__) + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/notifier.h> +#include <linux/kthread.h> +#include <linux/mutex.h> +#include <linux/io.h> +#include <linux/module.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/xen/hypervisor.h> +#include <xen/xenbus.h> +#include <xen/events.h> +#include <xen/page.h> +#include <xen/xen.h> + +#include <xen/platform_pci.h> + +#include "xenbus_comms.h" +#include "xenbus_probe.h" + + +static struct workqueue_struct *xenbus_frontend_wq; + +/* device/<type>/<id> => <type>-<id> */ +static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) +{ + nodename = strchr(nodename, '/'); + if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { + pr_warn("bad frontend %s\n", nodename); + return -EINVAL; + } + + strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); + if (!strchr(bus_id, '/')) { + pr_warn("bus_id %s no slash\n", bus_id); + return -EINVAL; + } + *strchr(bus_id, '/') = '-'; + return 0; +} + +/* device/<typename>/<name> */ +static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type, + const char *name) +{ + char *nodename; + int err; + + /* ignore console/0 */ + if (!strncmp(type, "console", 7) && !strncmp(name, "0", 1)) { + DPRINTK("Ignoring buggy device entry console/0"); + return 0; + } + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s", nodename); + + err = xenbus_probe_node(bus, type, nodename); + kfree(nodename); + return err; +} + +static int xenbus_uevent_frontend(struct device *_dev, + struct kobj_uevent_env *env) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + + if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) + return -ENOMEM; + + return 0; +} + + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + xenbus_otherend_changed(watch, vec, len, 1); +} + +static void xenbus_frontend_delayed_resume(struct work_struct *w) +{ + struct xenbus_device *xdev = container_of(w, struct xenbus_device, work); + + xenbus_dev_resume(&xdev->dev); +} + +static int xenbus_frontend_dev_resume(struct device *dev) +{ + /* + * If xenstored is running in this domain, we cannot access the backend + * state at the moment, so we need to defer xenbus_dev_resume + */ + if (xen_store_domain_type == XS_LOCAL) { + struct xenbus_device *xdev = to_xenbus_device(dev); + + if (!xenbus_frontend_wq) { + pr_err("%s: no workqueue to process delayed resume\n", + xdev->nodename); + return -EFAULT; + } + + queue_work(xenbus_frontend_wq, &xdev->work); + + return 0; + } + + return xenbus_dev_resume(dev); +} + +static int xenbus_frontend_dev_probe(struct device *dev) +{ + if (xen_store_domain_type == XS_LOCAL) { + struct xenbus_device *xdev = to_xenbus_device(dev); + INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume); + } + + return xenbus_dev_probe(dev); +} + +static const struct dev_pm_ops xenbus_pm_ops = { + .suspend = xenbus_dev_suspend, + .resume = xenbus_frontend_dev_resume, + .freeze = xenbus_dev_suspend, + .thaw = xenbus_dev_cancel, + .restore = xenbus_dev_resume, +}; + +static struct xen_bus_type xenbus_frontend = { + .root = "device", + .levels = 2, /* device/type/<id> */ + .get_bus_id = frontend_bus_id, + .probe = xenbus_probe_frontend, + .otherend_changed = backend_changed, + .bus = { + .name = "xen", + .match = xenbus_match, + .uevent = xenbus_uevent_frontend, + .probe = xenbus_frontend_dev_probe, + .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, + .dev_groups = xenbus_dev_groups, + + .pm = &xenbus_pm_ops, + }, +}; + +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); +} + + +/* We watch for devices appearing and vanishing. */ +static struct xenbus_watch fe_watch = { + .node = "device", + .callback = frontend_changed, +}; + +static int read_backend_details(struct xenbus_device *xendev) +{ + return xenbus_read_otherend_details(xendev, "backend-id", "backend"); +} + +static int is_device_connecting(struct device *dev, void *data, bool ignore_nonessential) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + struct xenbus_driver *xendrv; + + /* + * A device with no driver will never connect. We care only about + * devices which should currently be in the process of connecting. + */ + if (!dev->driver) + return 0; + + /* Is this search limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + if (ignore_nonessential) { + /* With older QEMU, for PVonHVM guests the guest config files + * could contain: vfb = [ 'vnc=1, vnclisten=0.0.0.0'] + * which is nonsensical as there is no PV FB (there can be + * a PVKB) running as HVM guest. */ + + if ((strncmp(xendev->nodename, "device/vkbd", 11) == 0)) + return 0; + + if ((strncmp(xendev->nodename, "device/vfb", 10) == 0)) + return 0; + } + xendrv = to_xenbus_driver(dev->driver); + return (xendev->state < XenbusStateConnected || + (xendev->state == XenbusStateConnected && + xendrv->is_ready && !xendrv->is_ready(xendev))); +} +static int essential_device_connecting(struct device *dev, void *data) +{ + return is_device_connecting(dev, data, true /* ignore PV[KBB+FB] */); +} +static int non_essential_device_connecting(struct device *dev, void *data) +{ + return is_device_connecting(dev, data, false); +} + +static int exists_essential_connecting_device(struct device_driver *drv) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + essential_device_connecting); +} +static int exists_non_essential_connecting_device(struct device_driver *drv) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + non_essential_device_connecting); +} + +static int print_device_status(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + + /* Is this operation limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + if (!dev->driver) { + /* Information only: is this too noisy? */ + pr_info("Device with no driver: %s\n", xendev->nodename); + } else if (xendev->state < XenbusStateConnected) { + enum xenbus_state rstate = XenbusStateUnknown; + if (xendev->otherend) + rstate = xenbus_read_driver_state(xendev->otherend); + pr_warn("Timeout connecting to device: %s (local state %d, remote state %d)\n", + xendev->nodename, xendev->state, rstate); + } + + return 0; +} + +/* We only wait for device setup after most initcalls have run. */ +static int ready_to_wait_for_devices; + +static bool wait_loop(unsigned long start, unsigned int max_delay, + unsigned int *seconds_waited) +{ + if (time_after(jiffies, start + (*seconds_waited+5)*HZ)) { + if (!*seconds_waited) + pr_warn("Waiting for devices to initialise: "); + *seconds_waited += 5; + pr_cont("%us...", max_delay - *seconds_waited); + if (*seconds_waited == max_delay) { + pr_cont("\n"); + return true; + } + } + + schedule_timeout_interruptible(HZ/10); + + return false; +} +/* + * On a 5-minute timeout, wait for all devices currently configured. We need + * to do this to guarantee that the filesystems and / or network devices + * needed for boot are available, before we can allow the boot to proceed. + * + * This needs to be on a late_initcall, to happen after the frontend device + * drivers have been initialised, but before the root fs is mounted. + * + * A possible improvement here would be to have the tools add a per-device + * flag to the store entry, indicating whether it is needed at boot time. + * This would allow people who knew what they were doing to accelerate their + * boot slightly, but of course needs tools or manual intervention to set up + * those flags correctly. + */ +static void wait_for_devices(struct xenbus_driver *xendrv) +{ + unsigned long start = jiffies; + struct device_driver *drv = xendrv ? &xendrv->driver : NULL; + unsigned int seconds_waited = 0; + + if (!ready_to_wait_for_devices || !xen_domain()) + return; + + while (exists_non_essential_connecting_device(drv)) + if (wait_loop(start, 30, &seconds_waited)) + break; + + /* Skips PVKB and PVFB check.*/ + while (exists_essential_connecting_device(drv)) + if (wait_loop(start, 270, &seconds_waited)) + break; + + if (seconds_waited) + printk("\n"); + + bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + print_device_status); +} + +int xenbus_register_frontend(struct xenbus_driver *drv) +{ + int ret; + + drv->read_otherend_details = read_backend_details; + + ret = xenbus_register_driver_common(drv, &xenbus_frontend); + if (ret) + return ret; + + /* If this driver is loaded as a module wait for devices to attach. */ + wait_for_devices(drv); + + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_register_frontend); + +static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); +static int backend_state; + +static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, + const char **v, unsigned int l) +{ + xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state); + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + v[XS_WATCH_PATH], xenbus_strstate(backend_state)); + wake_up(&backend_state_wq); +} + +static void xenbus_reset_wait_for_backend(char *be, int expected) +{ + long timeout; + timeout = wait_event_interruptible_timeout(backend_state_wq, + backend_state == expected, 5 * HZ); + if (timeout <= 0) + pr_info("backend %s timed out\n", be); +} + +/* + * Reset frontend if it is in Connected or Closed state. + * Wait for backend to catch up. + * State Connected happens during kdump, Closed after kexec. + */ +static void xenbus_reset_frontend(char *fe, char *be, int be_state) +{ + struct xenbus_watch be_watch; + + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + be, xenbus_strstate(be_state)); + + memset(&be_watch, 0, sizeof(be_watch)); + be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be); + if (!be_watch.node) + return; + + be_watch.callback = xenbus_reset_backend_state_changed; + backend_state = XenbusStateUnknown; + + pr_info("triggering reconnect on %s\n", be); + register_xenbus_watch(&be_watch); + + /* fall through to forward backend to state XenbusStateInitialising */ + switch (be_state) { + case XenbusStateConnected: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing); + xenbus_reset_wait_for_backend(be, XenbusStateClosing); + + case XenbusStateClosing: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed); + xenbus_reset_wait_for_backend(be, XenbusStateClosed); + + case XenbusStateClosed: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising); + xenbus_reset_wait_for_backend(be, XenbusStateInitWait); + } + + unregister_xenbus_watch(&be_watch); + pr_info("reconnect done on %s\n", be); + kfree(be_watch.node); +} + +static void xenbus_check_frontend(char *class, char *dev) +{ + int be_state, fe_state, err; + char *backend, *frontend; + + frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev); + if (!frontend) + return; + + err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state); + if (err != 1) + goto out; + + switch (fe_state) { + case XenbusStateConnected: + case XenbusStateClosed: + printk(KERN_DEBUG "XENBUS: frontend %s %s\n", + frontend, xenbus_strstate(fe_state)); + backend = xenbus_read(XBT_NIL, frontend, "backend", NULL); + if (!backend || IS_ERR(backend)) + goto out; + err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state); + if (err == 1) + xenbus_reset_frontend(frontend, backend, be_state); + kfree(backend); + break; + default: + break; + } +out: + kfree(frontend); +} + +static void xenbus_reset_state(void) +{ + char **devclass, **dev; + int devclass_n, dev_n; + int i, j; + + devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n); + if (IS_ERR(devclass)) + return; + + for (i = 0; i < devclass_n; i++) { + dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n); + if (IS_ERR(dev)) + continue; + for (j = 0; j < dev_n; j++) + xenbus_check_frontend(devclass[i], dev[j]); + kfree(dev); + } + kfree(devclass); +} + +static int frontend_probe_and_watch(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + /* reset devices in Connected or Closed state */ + if (xen_hvm_domain()) + xenbus_reset_state(); + /* Enumerate devices in xenstore and watch for changes. */ + xenbus_probe_devices(&xenbus_frontend); + register_xenbus_watch(&fe_watch); + + return NOTIFY_DONE; +} + + +static int __init xenbus_probe_frontend_init(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = frontend_probe_and_watch + }; + int err; + + DPRINTK(""); + + /* Register ourselves with the kernel bus subsystem */ + err = bus_register(&xenbus_frontend.bus); + if (err) + return err; + + register_xenstore_notifier(&xenstore_notifier); + + if (xen_store_domain_type == XS_LOCAL) { + xenbus_frontend_wq = create_workqueue("xenbus_frontend"); + if (!xenbus_frontend_wq) + pr_warn("create xenbus frontend workqueue failed, S3 resume is likely to fail\n"); + } + + return 0; +} +subsys_initcall(xenbus_probe_frontend_init); + +#ifndef MODULE +static int __init boot_wait_for_devices(void) +{ + if (!xen_has_pv_devices()) + return -ENODEV; + + ready_to_wait_for_devices = 1; + wait_for_devices(NULL); + return 0; +} + +late_initcall(boot_wait_for_devices); +#endif + +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 227d53b12a5..ba804f3d827 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -31,6 +31,8 @@ * IN THE SOFTWARE. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/unistd.h> #include <linux/errno.h> #include <linux/types.h> @@ -44,8 +46,11 @@ #include <linux/rwsem.h> #include <linux/module.h> #include <linux/mutex.h> +#include <asm/xen/hypervisor.h> #include <xen/xenbus.h> +#include <xen/xen.h> #include "xenbus_comms.h" +#include "xenbus_probe.h" struct xs_stored_msg { struct list_head list; @@ -76,6 +81,14 @@ struct xs_handle { /* * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex. * response_mutex is never taken simultaneously with the other three. + * + * transaction_mutex must be held before incrementing + * transaction_count. The mutex is held when a suspend is in + * progress to prevent new transactions starting. + * + * When decrementing transaction_count to zero the wait queue + * should be woken up, the suspend code waits for count to + * reach zero. */ /* One request at a time. */ @@ -85,7 +98,9 @@ struct xs_handle { struct mutex response_mutex; /* Protect transactions against save/restore. */ - struct rw_semaphore transaction_mutex; + struct mutex transaction_mutex; + atomic_t transaction_count; + wait_queue_head_t transaction_wq; /* Protect watch (de)register against save/restore. */ struct rw_semaphore watch_mutex; @@ -117,15 +132,37 @@ static int get_error(const char *errorstring) for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) { if (i == ARRAY_SIZE(xsd_errors) - 1) { - printk(KERN_WARNING - "XENBUS xen store gave: unknown error %s", - errorstring); + pr_warn("xen store gave: unknown error %s\n", + errorstring); return EINVAL; } } return xsd_errors[i].errnum; } +static bool xenbus_ok(void) +{ + switch (xen_store_domain_type) { + case XS_LOCAL: + switch (system_state) { + case SYSTEM_POWER_OFF: + case SYSTEM_RESTART: + case SYSTEM_HALT: + return false; + default: + break; + } + return true; + case XS_PV: + case XS_HVM: + /* FIXME: Could check that the remote domain is alive, + * but it is normally initial domain. */ + return true; + default: + break; + } + return false; +} static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) { struct xs_stored_msg *msg; @@ -135,9 +172,20 @@ static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) while (list_empty(&xs_state.reply_list)) { spin_unlock(&xs_state.reply_lock); - /* XXX FIXME: Avoid synchronous wait for response here. */ - wait_event(xs_state.reply_waitq, - !list_empty(&xs_state.reply_list)); + if (xenbus_ok()) + /* XXX FIXME: Avoid synchronous wait for response here. */ + wait_event_timeout(xs_state.reply_waitq, + !list_empty(&xs_state.reply_list), + msecs_to_jiffies(500)); + else { + /* + * If we are in the process of being shut-down there is + * no point of trying to contact XenBus - it is either + * killed (xenstored application) or the other domain + * has been killed or is unreachable. + */ + return ERR_PTR(-EIO); + } spin_lock(&xs_state.reply_lock); } @@ -157,6 +205,31 @@ static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) return body; } +static void transaction_start(void) +{ + mutex_lock(&xs_state.transaction_mutex); + atomic_inc(&xs_state.transaction_count); + mutex_unlock(&xs_state.transaction_mutex); +} + +static void transaction_end(void) +{ + if (atomic_dec_and_test(&xs_state.transaction_count)) + wake_up(&xs_state.transaction_wq); +} + +static void transaction_suspend(void) +{ + mutex_lock(&xs_state.transaction_mutex); + wait_event(xs_state.transaction_wq, + atomic_read(&xs_state.transaction_count) == 0); +} + +static void transaction_resume(void) +{ + mutex_unlock(&xs_state.transaction_mutex); +} + void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) { void *ret; @@ -164,7 +237,7 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) int err; if (req_msg.type == XS_TRANSACTION_START) - down_read(&xs_state.transaction_mutex); + transaction_start(); mutex_lock(&xs_state.request_mutex); @@ -177,13 +250,17 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) mutex_unlock(&xs_state.request_mutex); + if (IS_ERR(ret)) + return ret; + if ((msg->type == XS_TRANSACTION_END) || ((req_msg.type == XS_TRANSACTION_START) && (msg->type == XS_ERROR))) - up_read(&xs_state.transaction_mutex); + transaction_end(); return ret; } +EXPORT_SYMBOL(xenbus_dev_request_and_reply); /* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ static void *xs_talkv(struct xenbus_transaction t, @@ -234,10 +311,8 @@ static void *xs_talkv(struct xenbus_transaction t, } if (msg.type != type) { - if (printk_ratelimit()) - printk(KERN_WARNING - "XENBUS unexpected type [%d], expected [%d]\n", - msg.type, type); + pr_warn_ratelimited("unexpected type [%d], expected [%d]\n", + msg.type, type); kfree(ret); return ERR_PTR(-EINVAL); } @@ -283,9 +358,9 @@ static char *join(const char *dir, const char *name) char *buffer; if (strlen(name) == 0) - buffer = kasprintf(GFP_KERNEL, "%s", dir); + buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s", dir); else - buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name); + buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", dir, name); return (!buffer) ? ERR_PTR(-ENOMEM) : buffer; } @@ -297,7 +372,7 @@ static char **split(char *strings, unsigned int len, unsigned int *num) *num = count_strings(strings, len); /* Transfer to one big alloc for easy freeing. */ - ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL); + ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH); if (!ret) { kfree(strings); return ERR_PTR(-ENOMEM); @@ -431,11 +506,11 @@ int xenbus_transaction_start(struct xenbus_transaction *t) { char *id_str; - down_read(&xs_state.transaction_mutex); + transaction_start(); id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL); if (IS_ERR(id_str)) { - up_read(&xs_state.transaction_mutex); + transaction_end(); return PTR_ERR(id_str); } @@ -460,7 +535,7 @@ int xenbus_transaction_end(struct xenbus_transaction t, int abort) err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); - up_read(&xs_state.transaction_mutex); + transaction_end(); return err; } @@ -495,21 +570,18 @@ int xenbus_printf(struct xenbus_transaction t, { va_list ap; int ret; -#define PRINTF_BUFFER_SIZE 4096 - char *printf_buffer; - - printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); - if (printf_buffer == NULL) - return -ENOMEM; + char *buf; va_start(ap, fmt); - ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); + buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap); va_end(ap); - BUG_ON(ret > PRINTF_BUFFER_SIZE-1); - ret = xenbus_write(t, dir, node, printf_buffer); + if (!buf) + return -ENOMEM; + + ret = xenbus_write(t, dir, node, buf); - kfree(printf_buffer); + kfree(buf); return ret; } @@ -583,6 +655,45 @@ static struct xenbus_watch *find_watch(const char *token) return NULL; } +/* + * Certain older XenBus toolstack cannot handle reading values that are + * not populated. Some Xen 3.4 installation are incapable of doing this + * so if we are running on anything older than 4 do not attempt to read + * control/platform-feature-xs_reset_watches. + */ +static bool xen_strict_xenbus_quirk(void) +{ +#ifdef CONFIG_X86 + uint32_t eax, ebx, ecx, edx, base; + + base = xen_cpuid_base(); + cpuid(base + 1, &eax, &ebx, &ecx, &edx); + + if ((eax >> 16) < 4) + return true; +#endif + return false; + +} +static void xs_reset_watches(void) +{ + int err, supported = 0; + + if (!xen_hvm_domain() || xen_initial_domain()) + return; + + if (xen_strict_xenbus_quirk()) + return; + + err = xenbus_scanf(XBT_NIL, "control", + "platform-feature-xs_reset_watches", "%d", &supported); + if (err != 1 || !supported) + return; + + err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL)); + if (err && err != -EEXIST) + pr_warn("xs_reset_watches failed: %d\n", err); +} /* Register callback to watch this node. */ int register_xenbus_watch(struct xenbus_watch *watch) @@ -602,8 +713,7 @@ int register_xenbus_watch(struct xenbus_watch *watch) err = xs_watch(watch->node, token); - /* Ignore errors due to multiple registration. */ - if ((err != 0) && (err != -EEXIST)) { + if (err) { spin_lock(&watches_lock); list_del(&watch->list); spin_unlock(&watches_lock); @@ -632,9 +742,7 @@ void unregister_xenbus_watch(struct xenbus_watch *watch) err = xs_unwatch(watch->node, token); if (err) - printk(KERN_WARNING - "XENBUS Failed to release watch %s: %i\n", - watch->node, err); + pr_warn("Failed to release watch %s: %i\n", watch->node, err); up_read(&xs_state.watch_mutex); @@ -661,7 +769,7 @@ EXPORT_SYMBOL_GPL(unregister_xenbus_watch); void xs_suspend(void) { - down_write(&xs_state.transaction_mutex); + transaction_suspend(); down_write(&xs_state.watch_mutex); mutex_lock(&xs_state.request_mutex); mutex_lock(&xs_state.response_mutex); @@ -672,9 +780,11 @@ void xs_resume(void) struct xenbus_watch *watch; char token[sizeof(watch) * 2 + 1]; + xb_init_comms(); + mutex_unlock(&xs_state.response_mutex); mutex_unlock(&xs_state.request_mutex); - up_write(&xs_state.transaction_mutex); + transaction_resume(); /* No need for watches_lock: the watch_mutex is sufficient. */ list_for_each_entry(watch, &watches, list) { @@ -690,7 +800,7 @@ void xs_suspend_cancel(void) mutex_unlock(&xs_state.response_mutex); mutex_unlock(&xs_state.request_mutex); up_write(&xs_state.watch_mutex); - up_write(&xs_state.transaction_mutex); + mutex_unlock(&xs_state.transaction_mutex); } static int xenwatch_thread(void *unused) @@ -751,7 +861,7 @@ static int process_msg(void) } - msg = kmalloc(sizeof(*msg), GFP_KERNEL); + msg = kmalloc(sizeof(*msg), GFP_NOIO | __GFP_HIGH); if (msg == NULL) { err = -ENOMEM; goto out; @@ -763,7 +873,13 @@ static int process_msg(void) goto out; } - body = kmalloc(msg->hdr.len + 1, GFP_KERNEL); + if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) { + kfree(msg); + err = -EINVAL; + goto out; + } + + body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH); if (body == NULL) { kfree(msg); err = -ENOMEM; @@ -820,8 +936,7 @@ static int xenbus_thread(void *unused) for (;;) { err = process_msg(); if (err) - printk(KERN_WARNING "XENBUS error %d while reading " - "message\n", err); + pr_warn("error %d while reading message\n", err); if (kthread_should_stop()) break; } @@ -840,8 +955,10 @@ int xs_init(void) mutex_init(&xs_state.request_mutex); mutex_init(&xs_state.response_mutex); - init_rwsem(&xs_state.transaction_mutex); + mutex_init(&xs_state.transaction_mutex); init_rwsem(&xs_state.watch_mutex); + atomic_set(&xs_state.transaction_count, 0); + init_waitqueue_head(&xs_state.transaction_wq); /* Initialize the shared memory rings to talk to xenstored */ err = xb_init_comms(); @@ -857,5 +974,8 @@ int xs_init(void) if (IS_ERR(task)) return PTR_ERR(task); + /* shutdown watches for kexec boot */ + xs_reset_watches(); + return 0; } |
