From 2fb3683e7b164ee2b324039f7c9d90fe5b1a259b Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Sat, 10 Dec 2011 19:29:47 +0100 Subject: xen: Add xenbus device driver Access to xenbus is currently handled via xenfs. This adds a device driver for xenbus and makes xenfs use this code. Signed-off-by: Bastian Blank Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/Makefile | 1 + drivers/xen/xenbus/xenbus_comms.h | 4 + drivers/xen/xenbus/xenbus_dev_frontend.c | 624 +++++++++++++++++++++++++++++++ 3 files changed, 629 insertions(+) create mode 100644 drivers/xen/xenbus/xenbus_dev_frontend.c (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile index 8dca685358b..a2ea363b9f3 100644 --- a/drivers/xen/xenbus/Makefile +++ b/drivers/xen/xenbus/Makefile @@ -1,4 +1,5 @@ obj-y += xenbus.o +obj-y += xenbus_dev_frontend.o xenbus-objs = xenbus-objs += xenbus_client.o diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h index c21db751373..6e42800fa49 100644 --- a/drivers/xen/xenbus/xenbus_comms.h +++ b/drivers/xen/xenbus/xenbus_comms.h @@ -31,6 +31,8 @@ #ifndef _XENBUS_COMMS_H #define _XENBUS_COMMS_H +#include + int xs_init(void); int xb_init_comms(void); @@ -43,4 +45,6 @@ int xs_input_avail(void); extern struct xenstore_domain_interface *xen_store_interface; extern int xen_store_evtchn; +extern const struct file_operations xen_xenbus_fops; + #endif /* _XENBUS_COMMS_H */ diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c new file mode 100644 index 00000000000..fb30cffe033 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -0,0 +1,624 @@ +/* + * Driver giving user-space access to the kernel's xenbus connection + * to xenstore. + * + * Copyright (c) 2005, Christian Limpach + * Copyright (c) 2005, Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Changes: + * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem + * and /proc/xen compatibility mount point. + * Turned xenfs into a loadable module. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xenbus_comms.h" + +#include +#include + +MODULE_LICENSE("GPL"); + +/* + * An element of a list of outstanding transactions, for which we're + * still waiting a reply. + */ +struct xenbus_transaction_holder { + struct list_head list; + struct xenbus_transaction handle; +}; + +/* + * A buffer of data on the queue. + */ +struct read_buffer { + struct list_head list; + unsigned int cons; + unsigned int len; + char msg[]; +}; + +struct xenbus_file_priv { + /* + * msgbuffer_mutex is held while partial requests are built up + * and complete requests are acted on. It therefore protects + * the "transactions" and "watches" lists, and the partial + * request length and buffer. + * + * reply_mutex protects the reply being built up to return to + * usermode. It nests inside msgbuffer_mutex but may be held + * alone during a watch callback. + */ + struct mutex msgbuffer_mutex; + + /* In-progress transactions */ + struct list_head transactions; + + /* Active watches. */ + struct list_head watches; + + /* Partial request. */ + unsigned int len; + union { + struct xsd_sockmsg msg; + char buffer[PAGE_SIZE]; + } u; + + /* Response queue. */ + struct mutex reply_mutex; + struct list_head read_buffers; + wait_queue_head_t read_waitq; + +}; + +/* Read out any raw xenbus messages queued up. */ +static ssize_t xenbus_file_read(struct file *filp, + char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_file_priv *u = filp->private_data; + struct read_buffer *rb; + unsigned i; + int ret; + + mutex_lock(&u->reply_mutex); +again: + while (list_empty(&u->read_buffers)) { + mutex_unlock(&u->reply_mutex); + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + ret = wait_event_interruptible(u->read_waitq, + !list_empty(&u->read_buffers)); + if (ret) + return ret; + mutex_lock(&u->reply_mutex); + } + + rb = list_entry(u->read_buffers.next, struct read_buffer, list); + i = 0; + while (i < len) { + unsigned sz = min((unsigned)len - i, rb->len - rb->cons); + + ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); + + i += sz - ret; + rb->cons += sz - ret; + + if (ret != 0) { + if (i == 0) + i = -EFAULT; + goto out; + } + + /* Clear out buffer if it has been consumed */ + if (rb->cons == rb->len) { + list_del(&rb->list); + kfree(rb); + if (list_empty(&u->read_buffers)) + break; + rb = list_entry(u->read_buffers.next, + struct read_buffer, list); + } + } + if (i == 0) + goto again; + +out: + mutex_unlock(&u->reply_mutex); + return i; +} + +/* + * Add a buffer to the queue. Caller must hold the appropriate lock + * if the queue is not local. (Commonly the caller will build up + * multiple queued buffers on a temporary local list, and then add it + * to the appropriate list under lock once all the buffers have een + * successfully allocated.) + */ +static int queue_reply(struct list_head *queue, const void *data, size_t len) +{ + struct read_buffer *rb; + + if (len == 0) + return 0; + + rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); + if (rb == NULL) + return -ENOMEM; + + rb->cons = 0; + rb->len = len; + + memcpy(rb->msg, data, len); + + list_add_tail(&rb->list, queue); + return 0; +} + +/* + * Free all the read_buffer s on a list. + * Caller must have sole reference to list. + */ +static void queue_cleanup(struct list_head *list) +{ + struct read_buffer *rb; + + while (!list_empty(list)) { + rb = list_entry(list->next, struct read_buffer, list); + list_del(list->next); + kfree(rb); + } +} + +struct watch_adapter { + struct list_head list; + struct xenbus_watch watch; + struct xenbus_file_priv *dev_data; + char *token; +}; + +static void free_watch_adapter(struct watch_adapter *watch) +{ + kfree(watch->watch.node); + kfree(watch->token); + kfree(watch); +} + +static struct watch_adapter *alloc_watch_adapter(const char *path, + const char *token) +{ + struct watch_adapter *watch; + + watch = kzalloc(sizeof(*watch), GFP_KERNEL); + if (watch == NULL) + goto out_fail; + + watch->watch.node = kstrdup(path, GFP_KERNEL); + if (watch->watch.node == NULL) + goto out_free; + + watch->token = kstrdup(token, GFP_KERNEL); + if (watch->token == NULL) + goto out_free; + + return watch; + +out_free: + free_watch_adapter(watch); + +out_fail: + return NULL; +} + +static void watch_fired(struct xenbus_watch *watch, + const char **vec, + unsigned int len) +{ + struct watch_adapter *adap; + struct xsd_sockmsg hdr; + const char *path, *token; + int path_len, tok_len, body_len, data_len = 0; + int ret; + LIST_HEAD(staging_q); + + adap = container_of(watch, struct watch_adapter, watch); + + path = vec[XS_WATCH_PATH]; + token = adap->token; + + path_len = strlen(path) + 1; + tok_len = strlen(token) + 1; + if (len > 2) + data_len = vec[len] - vec[2] + 1; + body_len = path_len + tok_len + data_len; + + hdr.type = XS_WATCH_EVENT; + hdr.len = body_len; + + mutex_lock(&adap->dev_data->reply_mutex); + + ret = queue_reply(&staging_q, &hdr, sizeof(hdr)); + if (!ret) + ret = queue_reply(&staging_q, path, path_len); + if (!ret) + ret = queue_reply(&staging_q, token, tok_len); + if (!ret && len > 2) + ret = queue_reply(&staging_q, vec[2], data_len); + + if (!ret) { + /* success: pass reply list onto watcher */ + list_splice_tail(&staging_q, &adap->dev_data->read_buffers); + wake_up(&adap->dev_data->read_waitq); + } else + queue_cleanup(&staging_q); + + mutex_unlock(&adap->dev_data->reply_mutex); +} + +static int xenbus_write_transaction(unsigned msg_type, + struct xenbus_file_priv *u) +{ + int rc; + void *reply; + struct xenbus_transaction_holder *trans = NULL; + LIST_HEAD(staging_q); + + if (msg_type == XS_TRANSACTION_START) { + trans = kmalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) { + rc = -ENOMEM; + goto out; + } + } + + reply = xenbus_dev_request_and_reply(&u->u.msg); + if (IS_ERR(reply)) { + kfree(trans); + rc = PTR_ERR(reply); + goto out; + } + + if (msg_type == XS_TRANSACTION_START) { + trans->handle.id = simple_strtoul(reply, NULL, 0); + + list_add(&trans->list, &u->transactions); + } else if (msg_type == XS_TRANSACTION_END) { + list_for_each_entry(trans, &u->transactions, list) + if (trans->handle.id == u->u.msg.tx_id) + break; + BUG_ON(&trans->list == &u->transactions); + list_del(&trans->list); + + kfree(trans); + } + + mutex_lock(&u->reply_mutex); + rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg)); + if (!rc) + rc = queue_reply(&staging_q, reply, u->u.msg.len); + if (!rc) { + list_splice_tail(&staging_q, &u->read_buffers); + wake_up(&u->read_waitq); + } else { + queue_cleanup(&staging_q); + } + mutex_unlock(&u->reply_mutex); + + kfree(reply); + +out: + return rc; +} + +static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) +{ + struct watch_adapter *watch, *tmp_watch; + char *path, *token; + int err, rc; + LIST_HEAD(staging_q); + + path = u->u.buffer + sizeof(u->u.msg); + token = memchr(path, 0, u->u.msg.len); + if (token == NULL) { + rc = -EILSEQ; + goto out; + } + token++; + + if (msg_type == XS_WATCH) { + watch = alloc_watch_adapter(path, token); + if (watch == NULL) { + rc = -ENOMEM; + goto out; + } + + watch->watch.callback = watch_fired; + watch->dev_data = u; + + err = register_xenbus_watch(&watch->watch); + if (err) { + free_watch_adapter(watch); + rc = err; + goto out; + } + list_add(&watch->list, &u->watches); + } else { + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { + if (!strcmp(watch->token, token) && + !strcmp(watch->watch.node, path)) { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + break; + } + } + } + + /* Success. Synthesize a reply to say all is OK. */ + { + struct { + struct xsd_sockmsg hdr; + char body[3]; + } __packed reply = { + { + .type = msg_type, + .len = sizeof(reply.body) + }, + "OK" + }; + + mutex_lock(&u->reply_mutex); + rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); + wake_up(&u->read_waitq); + mutex_unlock(&u->reply_mutex); + } + +out: + return rc; +} + +static ssize_t xenbus_file_write(struct file *filp, + const char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_file_priv *u = filp->private_data; + uint32_t msg_type; + int rc = len; + int ret; + LIST_HEAD(staging_q); + + /* + * We're expecting usermode to be writing properly formed + * xenbus messages. If they write an incomplete message we + * buffer it up. Once it is complete, we act on it. + */ + + /* + * Make sure concurrent writers can't stomp all over each + * other's messages and make a mess of our partial message + * buffer. We don't make any attemppt to stop multiple + * writers from making a mess of each other's incomplete + * messages; we're just trying to guarantee our own internal + * consistency and make sure that single writes are handled + * atomically. + */ + mutex_lock(&u->msgbuffer_mutex); + + /* Get this out of the way early to avoid confusion */ + if (len == 0) + goto out; + + /* Can't write a xenbus message larger we can buffer */ + if ((len + u->len) > sizeof(u->u.buffer)) { + /* On error, dump existing buffer */ + u->len = 0; + rc = -EINVAL; + goto out; + } + + ret = copy_from_user(u->u.buffer + u->len, ubuf, len); + + if (ret != 0) { + rc = -EFAULT; + goto out; + } + + /* Deal with a partial copy. */ + len -= ret; + rc = len; + + u->len += len; + + /* Return if we haven't got a full message yet */ + if (u->len < sizeof(u->u.msg)) + goto out; /* not even the header yet */ + + /* If we're expecting a message that's larger than we can + possibly send, dump what we have and return an error. */ + if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) { + rc = -E2BIG; + u->len = 0; + goto out; + } + + if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) + goto out; /* incomplete data portion */ + + /* + * OK, now we have a complete message. Do something with it. + */ + + msg_type = u->u.msg.type; + + switch (msg_type) { + case XS_WATCH: + case XS_UNWATCH: + /* (Un)Ask for some path to be watched for changes */ + ret = xenbus_write_watch(msg_type, u); + break; + + default: + /* Send out a transaction */ + ret = xenbus_write_transaction(msg_type, u); + break; + } + if (ret != 0) + rc = ret; + + /* Buffered message consumed */ + u->len = 0; + + out: + mutex_unlock(&u->msgbuffer_mutex); + return rc; +} + +static int xenbus_file_open(struct inode *inode, struct file *filp) +{ + struct xenbus_file_priv *u; + + if (xen_store_evtchn == 0) + return -ENOENT; + + nonseekable_open(inode, filp); + + u = kzalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&u->transactions); + INIT_LIST_HEAD(&u->watches); + INIT_LIST_HEAD(&u->read_buffers); + init_waitqueue_head(&u->read_waitq); + + mutex_init(&u->reply_mutex); + mutex_init(&u->msgbuffer_mutex); + + filp->private_data = u; + + return 0; +} + +static int xenbus_file_release(struct inode *inode, struct file *filp) +{ + struct xenbus_file_priv *u = filp->private_data; + struct xenbus_transaction_holder *trans, *tmp; + struct watch_adapter *watch, *tmp_watch; + struct read_buffer *rb, *tmp_rb; + + /* + * No need for locking here because there are no other users, + * by definition. + */ + + list_for_each_entry_safe(trans, tmp, &u->transactions, list) { + xenbus_transaction_end(trans->handle, 1); + list_del(&trans->list); + kfree(trans); + } + + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + } + + list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { + list_del(&rb->list); + kfree(rb); + } + kfree(u); + + return 0; +} + +static unsigned int xenbus_file_poll(struct file *file, poll_table *wait) +{ + struct xenbus_file_priv *u = file->private_data; + + poll_wait(file, &u->read_waitq, wait); + if (!list_empty(&u->read_buffers)) + return POLLIN | POLLRDNORM; + return 0; +} + +const struct file_operations xen_xenbus_fops = { + .read = xenbus_file_read, + .write = xenbus_file_write, + .open = xenbus_file_open, + .release = xenbus_file_release, + .poll = xenbus_file_poll, + .llseek = no_llseek, +}; +EXPORT_SYMBOL_GPL(xen_xenbus_fops); + +static struct miscdevice xenbus_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus", + .fops = &xen_xenbus_fops, +}; + +static int __init xenbus_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + err = misc_register(&xenbus_dev); + if (err) + printk(KERN_ERR "Could not register xenbus device\n"); + return err; +} + +static void __exit xenbus_exit(void) +{ + misc_deregister(&xenbus_dev); +} + +module_init(xenbus_init); +module_exit(xenbus_exit); -- cgit v1.2.3-18-g5258 From e9f0fec3f5d406c500861da779d16a779a110055 Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Sat, 10 Dec 2011 19:29:48 +0100 Subject: xen: Add xenbus_backend device Access for xenstored to the event channel and pre-allocated ring is managed via xenfs. This adds its own character device featuring mmap for the ring and an ioctl for the event channel. Signed-off-by: Bastian Blank Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/Makefile | 1 + drivers/xen/xenbus/xenbus_dev_backend.c | 89 +++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 drivers/xen/xenbus/xenbus_dev_backend.c (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile index a2ea363b9f3..31e2e9050c7 100644 --- a/drivers/xen/xenbus/Makefile +++ b/drivers/xen/xenbus/Makefile @@ -10,4 +10,5 @@ xenbus-objs += xenbus_probe.o xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o xenbus-objs += $(xenbus-be-objs-y) +obj-$(CONFIG_XEN_BACKEND) += xenbus_dev_backend.o obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c new file mode 100644 index 00000000000..a2092bd9769 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "xenbus_comms.h" + +MODULE_LICENSE("GPL"); + +static int xenbus_backend_open(struct inode *inode, struct file *filp) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return nonseekable_open(inode, filp); +} + +static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned long data) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (cmd) { + case IOCTL_XENBUS_BACKEND_EVTCHN: + if (xen_store_evtchn > 0) + return xen_store_evtchn; + return -ENODEV; + + default: + return -ENOTTY; + } +} + +static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) + return -EINVAL; + + if (remap_pfn_range(vma, vma->vm_start, + virt_to_pfn(xen_store_interface), + size, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +const struct file_operations xenbus_backend_fops = { + .open = xenbus_backend_open, + .mmap = xenbus_backend_mmap, + .unlocked_ioctl = xenbus_backend_ioctl, +}; + +static struct miscdevice xenbus_backend_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus_backend", + .fops = &xenbus_backend_fops, +}; + +static int __init xenbus_backend_init(void) +{ + int err; + + if (!xen_initial_domain()) + return -ENODEV; + + err = misc_register(&xenbus_backend_dev); + if (err) + printk(KERN_ERR "Could not register xenbus backend device\n"); + return err; +} + +static void __exit xenbus_backend_exit(void) +{ + misc_deregister(&xenbus_backend_dev); +} + +module_init(xenbus_backend_init); +module_exit(xenbus_backend_exit); -- cgit v1.2.3-18-g5258 From fe7acdbec195339e2fbcee518229e85fb9c329b1 Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Sat, 10 Dec 2011 19:29:50 +0100 Subject: xen/xenbus-frontend: Make error message more clear Add the work frontend to the error message because we now also have a backend device. Signed-off-by: Bastian Blank Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index fb30cffe033..9f6be7d59a6 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -611,7 +611,7 @@ static int __init xenbus_init(void) err = misc_register(&xenbus_dev); if (err) - printk(KERN_ERR "Could not register xenbus device\n"); + printk(KERN_ERR "Could not register xenbus frontend device\n"); return err; } -- cgit v1.2.3-18-g5258 From b79d2ff98caee60c07a7598aba3b26acd1655a99 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 19 Dec 2011 15:08:15 -0500 Subject: xen/xenbus-frontend: Fix compile error with randconfig drivers/xen/xenbus/xenbus_dev_frontend.c: In function 'xenbus_init': drivers/xen/xenbus/xenbus_dev_frontend.c:609:2: error: implicit declaration of function 'xen_domain' Reported-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_dev_frontend.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 9f6be7d59a6..aec01420d97 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -58,6 +58,7 @@ #include "xenbus_comms.h" #include +#include #include MODULE_LICENSE("GPL"); -- cgit v1.2.3-18-g5258 From 2c5d37d30fbd27d424a18abc16786cb152a37017 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Mon, 19 Dec 2011 14:55:14 -0500 Subject: xenbus: Support HVM backends Add HVM implementations of xenbus_(map,unmap)_ring_v(alloc,free) so that ring mappings can be done without using GNTMAP_contains_pte which is not supported on HVM. This also removes the need to use vmlist_lock on PV by tracking the allocated xenbus rings. Signed-off-by: Daniel De Graaf [v1: Fix compile error when XENBUS_FRONTEND is defined as module] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_client.c | 176 ++++++++++++++++++++++++++++++++----- drivers/xen/xenbus/xenbus_probe.c | 2 + drivers/xen/xenbus/xenbus_probe.h | 2 + 3 files changed, 159 insertions(+), 21 deletions(-) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 1906125eab4..0fa52916ad0 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -32,15 +32,39 @@ #include #include +#include #include #include #include #include #include #include +#include #include #include #include +#include + +#include "xenbus_probe.h" + +struct xenbus_map_node { + struct list_head next; + union { + struct vm_struct *area; /* PV */ + struct page *page; /* HVM */ + }; + grant_handle_t handle; +}; + +static DEFINE_SPINLOCK(xenbus_valloc_lock); +static LIST_HEAD(xenbus_valloc_pages); + +struct xenbus_ring_ops { + int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); + int (*unmap)(struct xenbus_device *dev, void *vaddr); +}; + +static const struct xenbus_ring_ops *ring_ops __read_mostly; const char *xenbus_strstate(enum xenbus_state state) { @@ -435,20 +459,34 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); * XenbusStateClosing and the error message will be saved in XenStore. */ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) +{ + return ring_ops->map(dev, gnt_ref, vaddr); +} +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + +static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, + int gnt_ref, void **vaddr) { struct gnttab_map_grant_ref op = { .flags = GNTMAP_host_map | GNTMAP_contains_pte, .ref = gnt_ref, .dom = dev->otherend_id, }; + struct xenbus_map_node *node; struct vm_struct *area; pte_t *pte; *vaddr = NULL; + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + area = alloc_vm_area(PAGE_SIZE, &pte); - if (!area) + if (!area) { + kfree(node); return -ENOMEM; + } op.host_addr = arbitrary_virt_to_machine(pte).maddr; @@ -457,19 +495,59 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) if (op.status != GNTST_okay) { free_vm_area(area); + kfree(node); xenbus_dev_fatal(dev, op.status, "mapping in shared page %d from domain %d", gnt_ref, dev->otherend_id); return op.status; } - /* Stuff the handle in an unused field */ - area->phys_addr = (unsigned long)op.handle; + node->handle = op.handle; + node->area = area; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); *vaddr = area->addr; return 0; } -EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + +static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, + int gnt_ref, void **vaddr) +{ + struct xenbus_map_node *node; + int err; + void *addr; + + *vaddr = NULL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); + if (err) + goto out_err; + + addr = pfn_to_kaddr(page_to_pfn(node->page)); + + err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); + if (err) + goto out_err; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); + + *vaddr = addr; + return 0; + + out_err: + free_xenballooned_pages(1, &node->page); + kfree(node); + return err; +} /** @@ -525,32 +603,36 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring); */ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) { - struct vm_struct *area; + return ring_ops->unmap(dev, vaddr); +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); + +static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) +{ + struct xenbus_map_node *node; struct gnttab_unmap_grant_ref op = { .host_addr = (unsigned long)vaddr, }; unsigned int level; - /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) - * method so that we don't have to muck with vmalloc internals here. - * We could force the user to hang on to their struct vm_struct from - * xenbus_map_ring_valloc, but these 6 lines considerably simplify - * this API. - */ - read_lock(&vmlist_lock); - for (area = vmlist; area != NULL; area = area->next) { - if (area->addr == vaddr) - break; + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + if (node->area->addr == vaddr) { + list_del(&node->next); + goto found; + } } - read_unlock(&vmlist_lock); + node = NULL; + found: + spin_unlock(&xenbus_valloc_lock); - if (!area) { + if (!node) { xenbus_dev_error(dev, -ENOENT, "can't find mapped virtual address %p", vaddr); return GNTST_bad_virt_addr; } - op.handle = (grant_handle_t)area->phys_addr; + op.handle = node->handle; op.host_addr = arbitrary_virt_to_machine( lookup_address((unsigned long)vaddr, &level)).maddr; @@ -558,16 +640,50 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) BUG(); if (op.status == GNTST_okay) - free_vm_area(area); + free_vm_area(node->area); else xenbus_dev_error(dev, op.status, "unmapping page at handle %d error %d", - (int16_t)area->phys_addr, op.status); + node->handle, op.status); + kfree(node); return op.status; } -EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); +static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) +{ + int rv; + struct xenbus_map_node *node; + void *addr; + + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + addr = pfn_to_kaddr(page_to_pfn(node->page)); + if (addr == vaddr) { + list_del(&node->next); + goto found; + } + } + node = NULL; + found: + spin_unlock(&xenbus_valloc_lock); + + if (!node) { + xenbus_dev_error(dev, -ENOENT, + "can't find mapped virtual address %p", vaddr); + return GNTST_bad_virt_addr; + } + + rv = xenbus_unmap_ring(dev, node->handle, addr); + + if (!rv) + free_xenballooned_pages(1, &node->page); + else + WARN(1, "Leaking %p\n", vaddr); + + kfree(node); + return rv; +} /** * xenbus_unmap_ring @@ -617,3 +733,21 @@ enum xenbus_state xenbus_read_driver_state(const char *path) return result; } EXPORT_SYMBOL_GPL(xenbus_read_driver_state); + +static const struct xenbus_ring_ops ring_ops_pv = { + .map = xenbus_map_ring_valloc_pv, + .unmap = xenbus_unmap_ring_vfree_pv, +}; + +static const struct xenbus_ring_ops ring_ops_hvm = { + .map = xenbus_map_ring_valloc_hvm, + .unmap = xenbus_unmap_ring_vfree_hvm, +}; + +void __init xenbus_ring_ops_init(void) +{ + if (xen_pv_domain()) + ring_ops = &ring_ops_pv; + else + ring_ops = &ring_ops_hvm; +} diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 1b178c6e893..1c05b2508ae 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -730,6 +730,8 @@ static int __init xenbus_init(void) if (!xen_domain()) return -ENODEV; + xenbus_ring_ops_init(); + if (xen_hvm_domain()) { uint64_t v = 0; err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index 9b1de4e34c6..460d784a769 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -76,4 +76,6 @@ extern void xenbus_otherend_changed(struct xenbus_watch *watch, extern int xenbus_read_otherend_details(struct xenbus_device *xendev, char *id_node, char *path_node); +void xenbus_ring_ops_init(void); + #endif -- cgit v1.2.3-18-g5258 From 2946a52ac7d57c9d02db477e3684259d86446ea7 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Wed, 14 Dec 2011 15:12:10 -0500 Subject: xenbus: Use grant-table wrapper functions For xenbus_{map,unmap}_ring to work on HVM, the grant table operations must be set up using the gnttab_set_{map,unmap}_op functions instead of directly populating the fields of gnttab_map_grant_ref. These functions simply populate the structure on paravirtualized Xen; however, on HVM they must call __pa() on vaddr when populating op->host_addr because the hypervisor cannot directly interpret guest-virtual addresses. Signed-off-by: Daniel De Graaf [v1: Fixed cleanpatch error] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_client.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 0fa52916ad0..566d2adbd6e 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -567,12 +567,10 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, grant_handle_t *handle, void *vaddr) { - struct gnttab_map_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref, + dev->otherend_id); if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) BUG(); @@ -698,10 +696,9 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t handle, void *vaddr) { - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .handle = handle, - }; + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle); if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) BUG(); -- cgit v1.2.3-18-g5258 From 01464a60a4d21fb649e088f7ae5136c6fb130889 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 21 Dec 2011 14:19:47 -0500 Subject: xen/xenbus: Fix compile error - missing header for xen_initial_domain() drivers/xen/xenbus/xenbus_dev_backend.c:74:2: error: implicit declaration of function 'xen_initial_domain' Reported-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_dev_backend.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c index a2092bd9769..3d3be78c109 100644 --- a/drivers/xen/xenbus/xenbus_dev_backend.c +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -6,6 +6,7 @@ #include #include +#include #include #include -- cgit v1.2.3-18-g5258 From 73db144b58a32fc39733db6a7e1fe582072ad26a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 22 Dec 2011 09:08:13 +0000 Subject: Xen: consolidate and simplify struct xenbus_driver instantiation The 'name', 'owner', and 'mod_name' members are redundant with the identically named fields in the 'driver' sub-structure. Rather than switching each instance to specify these fields explicitly, introduce a macro to simplify this. Eliminate further redundancy by allowing the drvname argument to DEFINE_XENBUS_DRIVER() to be blank (in which case the first entry from the ID table will be used for .driver.name). Also eliminate the questionable xenbus_register_{back,front}end() wrappers - their sole remaining purpose was the checking of the 'owner' field, proper setting of which shouldn't be an issue anymore when the macro gets used. v2: Restore DRV_NAME for the driver name in xen-pciback. Signed-off-by: Jan Beulich Cc: Jens Axboe Cc: Dmitry Torokhov Cc: Florian Tobias Schandinat Cc: Ian Campbell Cc: David S. Miller Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_probe.c | 7 +------ drivers/xen/xenbus/xenbus_probe.h | 4 +--- drivers/xen/xenbus/xenbus_probe_backend.c | 8 +++----- drivers/xen/xenbus/xenbus_probe_frontend.c | 8 +++----- 4 files changed, 8 insertions(+), 19 deletions(-) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 1c05b2508ae..3864967202b 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -291,14 +291,9 @@ void xenbus_dev_shutdown(struct device *_dev) EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name) + struct xen_bus_type *bus) { - drv->driver.name = drv->name; drv->driver.bus = &bus->bus; - drv->driver.owner = owner; - drv->driver.mod_name = mod_name; return driver_register(&drv->driver); } diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index 460d784a769..bb4f92ed873 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -53,9 +53,7 @@ extern int xenbus_match(struct device *_dev, struct device_driver *_drv); extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); extern int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name); + struct xen_bus_type *bus); extern int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index c3c7cd195c1..257be37d909 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -232,15 +232,13 @@ int xenbus_dev_is_online(struct xenbus_device *dev) } EXPORT_SYMBOL_GPL(xenbus_dev_is_online); -int __xenbus_register_backend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) +int xenbus_register_backend(struct xenbus_driver *drv) { drv->read_otherend_details = read_frontend_details; - return xenbus_register_driver_common(drv, &xenbus_backend, - owner, mod_name); + return xenbus_register_driver_common(drv, &xenbus_backend); } -EXPORT_SYMBOL_GPL(__xenbus_register_backend); +EXPORT_SYMBOL_GPL(xenbus_register_backend); static int backend_probe_and_watch(struct notifier_block *notifier, unsigned long event, diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 2f73195512b..9c57819df51 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -230,15 +230,13 @@ static void wait_for_devices(struct xenbus_driver *xendrv) print_device_status); } -int __xenbus_register_frontend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) +int xenbus_register_frontend(struct xenbus_driver *drv) { int ret; drv->read_otherend_details = read_backend_details; - ret = xenbus_register_driver_common(drv, &xenbus_frontend, - owner, mod_name); + ret = xenbus_register_driver_common(drv, &xenbus_frontend); if (ret) return ret; @@ -247,7 +245,7 @@ int __xenbus_register_frontend(struct xenbus_driver *drv, return 0; } -EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +EXPORT_SYMBOL_GPL(xenbus_register_frontend); static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); static int backend_state; -- cgit v1.2.3-18-g5258 From 9e7860cee18241633eddb36a4c34c7b61d8cecbc Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 4 Jan 2012 09:34:49 +0000 Subject: xen/xenbus: Reject replies with payload > XENSTORE_PAYLOAD_MAX. Haogang Chen found out that: There is a potential integer overflow in process_msg() that could result in cross-domain attack. body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH); When a malicious guest passes 0xffffffff in msg->hdr.len, the subsequent call to xb_read() would write to a zero-length buffer. The other end of this connection is always the xenstore backend daemon so there is no guest (malicious or otherwise) which can do this. The xenstore daemon is a trusted component in the system. However this seem like a reasonable robustness improvement so we should have it. And Ian when read the API docs found that: The payload length (len field of the header) is limited to 4096 (XENSTORE_PAYLOAD_MAX) in both directions. If a client exceeds the limit, its xenstored connection will be immediately killed by xenstored, which is usually catastrophic from the client's point of view. Clients (particularly domains, which cannot just reconnect) should avoid this. so this patch checks against that instead. This also avoids a potential integer overflow pointed out by Haogang Chen. Signed-off-by: Ian Campbell Cc: Haogang Chen CC: stable@kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_xs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index b3b8f2f3ad1..6f0121e3be6 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -810,6 +810,12 @@ static int process_msg(void) goto out; } + if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) { + kfree(msg); + err = -EINVAL; + goto out; + } + body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH); if (body == NULL) { kfree(msg); -- cgit v1.2.3-18-g5258 From 50bf73796e85ed6a061df6d8474f7cef7870df6a Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 4 Jan 2012 11:39:51 +0000 Subject: xenbus: maximum buffer size is XENSTORE_PAYLOAD_MAX Use this now that it is defined even though it happens to be == PAGE_SIZE. The code which takes requests from userspace already validates against the size of this buffer so no further checks are required to ensure that userspace requests comply with the protocol in this respect. Signed-off-by: Ian Campbell Cc: Haogang Chen Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index aec01420d97..527dc2a3b89 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -105,7 +105,7 @@ struct xenbus_file_priv { unsigned int len; union { struct xsd_sockmsg msg; - char buffer[PAGE_SIZE]; + char buffer[XENSTORE_PAYLOAD_MAX]; } u; /* Response queue. */ -- cgit v1.2.3-18-g5258 From a800651e8893007d3a12bc281f0265f18043c4fa Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 4 Jan 2012 11:39:52 +0000 Subject: xen/xenbus: don't reimplement kvasprintf via a fixed size buffer Signed-off-by: Ian Campbell Cc: Haogang Chen Acked-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_xs.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'drivers/xen/xenbus') diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 6f0121e3be6..226d1ac55cf 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -532,21 +532,18 @@ int xenbus_printf(struct xenbus_transaction t, { va_list ap; int ret; -#define PRINTF_BUFFER_SIZE 4096 - char *printf_buffer; - - printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_NOIO | __GFP_HIGH); - if (printf_buffer == NULL) - return -ENOMEM; + char *buf; va_start(ap, fmt); - ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); + buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap); va_end(ap); - BUG_ON(ret > PRINTF_BUFFER_SIZE-1); - ret = xenbus_write(t, dir, node, printf_buffer); + if (!buf) + return -ENOMEM; + + ret = xenbus_write(t, dir, node, buf); - kfree(printf_buffer); + kfree(buf); return ret; } -- cgit v1.2.3-18-g5258