diff options
Diffstat (limited to 'net/9p')
| -rw-r--r-- | net/9p/Kconfig | 26 | ||||
| -rw-r--r-- | net/9p/Makefile | 11 | ||||
| -rw-r--r-- | net/9p/client.c | 2461 | ||||
| -rw-r--r-- | net/9p/conv.c | 924 | ||||
| -rw-r--r-- | net/9p/error.c | 23 | ||||
| -rw-r--r-- | net/9p/fcprint.c | 358 | ||||
| -rw-r--r-- | net/9p/mod.c | 136 | ||||
| -rw-r--r-- | net/9p/protocol.c | 636 | ||||
| -rw-r--r-- | net/9p/protocol.h | 34 | ||||
| -rw-r--r-- | net/9p/trans_common.c | 69 | ||||
| -rw-r--r-- | net/9p/trans_common.h | 17 | ||||
| -rw-r--r-- | net/9p/trans_fd.c | 1712 | ||||
| -rw-r--r-- | net/9p/trans_rdma.c | 765 | ||||
| -rw-r--r-- | net/9p/trans_virtio.c | 741 | ||||
| -rw-r--r-- | net/9p/util.c | 58 |
15 files changed, 4707 insertions, 3264 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig index bafc50c9e6f..a75174a3372 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -3,8 +3,8 @@ # menuconfig NET_9P - depends on NET && EXPERIMENTAL - tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" + depends on NET + tristate "Plan 9 Resource Sharing Support (9P2000)" help If you say Y here, you will get experimental support for Plan 9 resource sharing via the 9P2000 protocol. @@ -13,26 +13,24 @@ menuconfig NET_9P If unsure, say N. -config NET_9P_FD - depends on NET_9P - default y if NET_9P - tristate "9P File Descriptor Transports (Experimental)" - help - This builds support for file descriptor transports for 9p - which includes support for TCP/IP, named pipes, or passed - file descriptors. TCP/IP is the default transport for 9p, - so if you are going to use 9p, you'll likely want this. +if NET_9P config NET_9P_VIRTIO - depends on NET_9P && EXPERIMENTAL && VIRTIO - tristate "9P Virtio Transport (Experimental)" + depends on VIRTIO + tristate "9P Virtio Transport" help This builds support for a transports between guest partitions and a host partition. +config NET_9P_RDMA + depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS + tristate "9P RDMA Transport (Experimental)" + help + This builds support for an RDMA transport. + config NET_9P_DEBUG bool "Debug information" - depends on NET_9P help Say Y if you want the 9P subsystem to log debug information. +endif diff --git a/net/9p/Makefile b/net/9p/Makefile index 8a105110189..a0874cc1f71 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,17 +1,18 @@ obj-$(CONFIG_NET_9P) := 9pnet.o -obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o +obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o 9pnet-objs := \ mod.o \ client.o \ - conv.o \ error.o \ - fcprint.o \ util.o \ - -9pnet_fd-objs := \ + protocol.o \ trans_fd.o \ + trans_common.o \ 9pnet_virtio-objs := \ trans_virtio.o \ + +9pnet_rdma-objs := \ + trans_rdma.o \ diff --git a/net/9p/client.c b/net/9p/client.c index 84e087e2414..0004cbaac4a 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -23,22 +23,25 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/poll.h> #include <linux/idr.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <linux/sched.h> #include <linux/uaccess.h> #include <net/9p/9p.h> #include <linux/parser.h> -#include <net/9p/transport.h> #include <net/9p/client.h> +#include <net/9p/transport.h> +#include "protocol.h" -static struct p9_fid *p9_fid_create(struct p9_client *clnt); -static void p9_fid_destroy(struct p9_fid *fid); -static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu); +#define CREATE_TRACE_POINTS +#include <trace/events/9p.h> /* * Client Option Parsing (code inspired by NFS code) @@ -49,160 +52,1018 @@ enum { Opt_msize, Opt_trans, Opt_legacy, + Opt_version, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_msize, "msize=%u"}, {Opt_legacy, "noextend"}, {Opt_trans, "trans=%s"}, + {Opt_version, "version=%s"}, {Opt_err, NULL}, }; +inline int p9_is_proto_dotl(struct p9_client *clnt) +{ + return clnt->proto_version == p9_proto_2000L; +} +EXPORT_SYMBOL(p9_is_proto_dotl); + +inline int p9_is_proto_dotu(struct p9_client *clnt) +{ + return clnt->proto_version == p9_proto_2000u; +} +EXPORT_SYMBOL(p9_is_proto_dotu); + +/* + * Some error codes are taken directly from the server replies, + * make sure they are valid. + */ +static int safe_errno(int err) +{ + if ((err > 0) || (err < -MAX_ERRNO)) { + p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err); + return -EPROTO; + } + return err; +} + + +/* Interpret mount option for protocol version */ +static int get_protocol_version(char *s) +{ + int version = -EINVAL; + + if (!strcmp(s, "9p2000")) { + version = p9_proto_legacy; + p9_debug(P9_DEBUG_9P, "Protocol version: Legacy\n"); + } else if (!strcmp(s, "9p2000.u")) { + version = p9_proto_2000u; + p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); + } else if (!strcmp(s, "9p2000.L")) { + version = p9_proto_2000L; + p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); + } else + pr_info("Unknown protocol version %s\n", s); + + return version; +} + /** - * v9fs_parse_options - parse mount options into session structure - * @options: options string passed from mount - * @v9ses: existing v9fs session information + * parse_options - parse mount options into client structure + * @opts: options string passed from mount + * @clnt: existing v9fs client information * + * Return 0 upon success, -ERRNO upon failure */ -static void parse_opts(char *options, struct p9_client *clnt) +static int parse_opts(char *opts, struct p9_client *clnt) { + char *options, *tmp_options; char *p; substring_t args[MAX_OPT_ARGS]; int option; - int ret; + char *s; + int ret = 0; - clnt->trans_mod = v9fs_default_trans(); - clnt->dotu = 1; + clnt->proto_version = p9_proto_2000L; clnt->msize = 8192; - if (!options) - return; + if (!opts) + return 0; + + tmp_options = kstrdup(opts, GFP_KERNEL); + if (!tmp_options) { + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } + options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { - int token; + int token, r; if (!*p) continue; token = match_token(p, tokens, args); - if (token < Opt_trans) { - ret = match_int(&args[0], &option); - if (ret < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - continue; - } - } switch (token) { case Opt_msize: + r = match_int(&args[0], &option); + if (r < 0) { + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + ret = r; + continue; + } clnt->msize = option; break; case Opt_trans: - clnt->trans_mod = v9fs_match_trans(&args[0]); + s = match_strdup(&args[0]); + if (!s) { + ret = -ENOMEM; + p9_debug(P9_DEBUG_ERROR, + "problem allocating copy of trans arg\n"); + goto free_and_return; + } + clnt->trans_mod = v9fs_get_trans_by_name(s); + if (clnt->trans_mod == NULL) { + pr_info("Could not find request transport: %s\n", + s); + ret = -EINVAL; + kfree(s); + goto free_and_return; + } + kfree(s); break; case Opt_legacy: - clnt->dotu = 0; + clnt->proto_version = p9_proto_legacy; + break; + case Opt_version: + s = match_strdup(&args[0]); + if (!s) { + ret = -ENOMEM; + p9_debug(P9_DEBUG_ERROR, + "problem allocating copy of version arg\n"); + goto free_and_return; + } + ret = get_protocol_version(s); + if (ret == -EINVAL) { + kfree(s); + goto free_and_return; + } + kfree(s); + clnt->proto_version = ret; break; default: continue; } } + +free_and_return: + kfree(tmp_options); + return ret; } +static struct p9_fcall *p9_fcall_alloc(int alloc_msize) +{ + struct p9_fcall *fc; + fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); + if (!fc) + return NULL; + fc->capacity = alloc_msize; + fc->sdata = (char *) fc + sizeof(struct p9_fcall); + return fc; +} /** - * p9_client_rpc - sends 9P request and waits until a response is available. - * The function can be interrupted. - * @c: client data - * @tc: request to be sent - * @rc: pointer where a pointer to the response is stored + * p9_tag_alloc - lookup/allocate a request by tag + * @c: client session to lookup tag within + * @tag: numeric id for transaction + * + * this is a simple array lookup, but will grow the + * request_slots as necessary to accommodate transaction + * ids which did not previously have a slot. + * + * this code relies on the client spinlock to manage locks, its + * possible we should switch to something else, but I'd rather + * stick with something low-overhead for the common case. + * */ -int -p9_client_rpc(struct p9_client *c, struct p9_fcall *tc, - struct p9_fcall **rc) + +static struct p9_req_t * +p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) { - return c->trans->rpc(c->trans, tc, rc); + unsigned long flags; + int row, col; + struct p9_req_t *req; + int alloc_msize = min(c->msize, max_size); + + /* This looks up the original request by tag so we know which + * buffer to read the data into */ + tag++; + + if (tag >= c->max_tag) { + spin_lock_irqsave(&c->lock, flags); + /* check again since original check was outside of lock */ + while (tag >= c->max_tag) { + row = (tag / P9_ROW_MAXTAG); + c->reqs[row] = kcalloc(P9_ROW_MAXTAG, + sizeof(struct p9_req_t), GFP_ATOMIC); + + if (!c->reqs[row]) { + pr_err("Couldn't grow tag array\n"); + spin_unlock_irqrestore(&c->lock, flags); + return ERR_PTR(-ENOMEM); + } + for (col = 0; col < P9_ROW_MAXTAG; col++) { + c->reqs[row][col].status = REQ_STATUS_IDLE; + c->reqs[row][col].tc = NULL; + } + c->max_tag += P9_ROW_MAXTAG; + } + spin_unlock_irqrestore(&c->lock, flags); + } + row = tag / P9_ROW_MAXTAG; + col = tag % P9_ROW_MAXTAG; + + req = &c->reqs[row][col]; + if (!req->wq) { + req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); + if (!req->wq) + goto grow_failed; + init_waitqueue_head(req->wq); + } + + if (!req->tc) + req->tc = p9_fcall_alloc(alloc_msize); + if (!req->rc) + req->rc = p9_fcall_alloc(alloc_msize); + if (!req->tc || !req->rc) + goto grow_failed; + + p9pdu_reset(req->tc); + p9pdu_reset(req->rc); + + req->tc->tag = tag-1; + req->status = REQ_STATUS_ALLOC; + + return req; + +grow_failed: + pr_err("Couldn't grow tag array\n"); + kfree(req->tc); + kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; + return ERR_PTR(-ENOMEM); } -struct p9_client *p9_client_create(const char *dev_name, char *options) +/** + * p9_tag_lookup - lookup a request by tag + * @c: client session to lookup tag within + * @tag: numeric id for transaction + * + */ + +struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) { - int err, n; - struct p9_client *clnt; - struct p9_fcall *tc, *rc; - struct p9_str *version; + int row, col; - err = 0; - tc = NULL; - rc = NULL; - clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); - if (!clnt) - return ERR_PTR(-ENOMEM); + /* This looks up the original request by tag so we know which + * buffer to read the data into */ + tag++; - spin_lock_init(&clnt->lock); - INIT_LIST_HEAD(&clnt->fidlist); - clnt->fidpool = p9_idpool_create(); - if (!clnt->fidpool) { - err = PTR_ERR(clnt->fidpool); - clnt->fidpool = NULL; + if(tag >= c->max_tag) + return NULL; + + row = tag / P9_ROW_MAXTAG; + col = tag % P9_ROW_MAXTAG; + + return &c->reqs[row][col]; +} +EXPORT_SYMBOL(p9_tag_lookup); + +/** + * p9_tag_init - setup tags structure and contents + * @c: v9fs client struct + * + * This initializes the tags structure for each client instance. + * + */ + +static int p9_tag_init(struct p9_client *c) +{ + int err = 0; + + c->tagpool = p9_idpool_create(); + if (IS_ERR(c->tagpool)) { + err = PTR_ERR(c->tagpool); goto error; } - - parse_opts(options, clnt); - if (clnt->trans_mod == NULL) { - err = -EPROTONOSUPPORT; - P9_DPRINTK(P9_DEBUG_ERROR, - "No transport defined or default transport\n"); + err = p9_idpool_get(c->tagpool); /* reserve tag 0 */ + if (err < 0) { + p9_idpool_destroy(c->tagpool); goto error; } + c->max_tag = 0; +error: + return err; +} - P9_DPRINTK(P9_DEBUG_9P, "clnt %p trans %p msize %d dotu %d\n", - clnt, clnt->trans_mod, clnt->msize, clnt->dotu); +/** + * p9_tag_cleanup - cleans up tags structure and reclaims resources + * @c: v9fs client struct + * + * This frees resources associated with the tags structure + * + */ +static void p9_tag_cleanup(struct p9_client *c) +{ + int row, col; + + /* check to insure all requests are idle */ + for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { + for (col = 0; col < P9_ROW_MAXTAG; col++) { + if (c->reqs[row][col].status != REQ_STATUS_IDLE) { + p9_debug(P9_DEBUG_MUX, + "Attempting to cleanup non-free tag %d,%d\n", + row, col); + /* TODO: delay execution of cleanup */ + return; + } + } + } + if (c->tagpool) { + p9_idpool_put(0, c->tagpool); /* free reserved tag 0 */ + p9_idpool_destroy(c->tagpool); + } - clnt->trans = clnt->trans_mod->create(dev_name, options, clnt->msize, - clnt->dotu); - if (IS_ERR(clnt->trans)) { - err = PTR_ERR(clnt->trans); - clnt->trans = NULL; - goto error; + /* free requests associated with tags */ + for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { + for (col = 0; col < P9_ROW_MAXTAG; col++) { + kfree(c->reqs[row][col].wq); + kfree(c->reqs[row][col].tc); + kfree(c->reqs[row][col].rc); + } + kfree(c->reqs[row]); } + c->max_tag = 0; +} - if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize) - clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ; +/** + * p9_free_req - free a request and clean-up as necessary + * c: client state + * r: request to release + * + */ - tc = p9_create_tversion(clnt->msize, clnt->dotu?"9P2000.u":"9P2000"); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; +static void p9_free_req(struct p9_client *c, struct p9_req_t *r) +{ + int tag = r->tc->tag; + p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); + + r->status = REQ_STATUS_IDLE; + if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) + p9_idpool_put(tag, c->tagpool); +} + +/** + * p9_client_cb - call back from transport to client + * c: client state + * req: request received + * + */ +void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) +{ + p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + + /* + * This barrier is needed to make sure any change made to req before + * the other thread wakes up will indeed be seen by the waiting side. + */ + smp_wmb(); + req->status = status; + + wake_up(req->wq); + p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); +} +EXPORT_SYMBOL(p9_client_cb); + +/** + * p9_parse_header - parse header arguments out of a packet + * @pdu: packet to parse + * @size: size of packet + * @type: type of request + * @tag: tag of packet + * @rewind: set if we need to rewind offset afterwards + */ + +int +p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag, + int rewind) +{ + int8_t r_type; + int16_t r_tag; + int32_t r_size; + int offset = pdu->offset; + int err; + + pdu->offset = 0; + if (pdu->size == 0) + pdu->size = 7; + + err = p9pdu_readf(pdu, 0, "dbw", &r_size, &r_type, &r_tag); + if (err) + goto rewind_and_exit; + + pdu->size = r_size; + pdu->id = r_type; + pdu->tag = r_tag; + + p9_debug(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", + pdu->size, pdu->id, pdu->tag); + + if (type) + *type = r_type; + if (tag) + *tag = r_tag; + if (size) + *size = r_size; + + +rewind_and_exit: + if (rewind) + pdu->offset = offset; + return err; +} +EXPORT_SYMBOL(p9_parse_header); + +/** + * p9_check_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ + +static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) +{ + int8_t type; + int err; + int ecode; + + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after check errors which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); + if (err) { + p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + return err; + } + if (type != P9_RERROR && type != P9_RLERROR) + return 0; + + if (!p9_is_proto_dotl(c)) { + char *ename; + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); + if (err) + goto out_err; + + if (p9_is_proto_dotu(c)) + err = -ecode; + + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); + + p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); + } + kfree(ename); + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } + + return err; + +out_err: + p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + + return err; +} + +/** + * p9_check_zc_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * @in_hdrlen: Size of response protocol buffer. + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ + +static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, + char *uidata, int in_hdrlen, int kern_buf) +{ + int err; + int ecode; + int8_t type; + char *ename = NULL; + + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after parse_header which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); + if (err) { + p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + return err; + } + + if (type != P9_RERROR && type != P9_RLERROR) + return 0; + + if (!p9_is_proto_dotl(c)) { + /* Error is reported in string format */ + int len; + /* 7 = header size for RERROR; */ + int inline_len = in_hdrlen - 7; + + len = req->rc->size - req->rc->offset; + if (len > (P9_ZC_HDR_SZ - 7)) { + err = -EFAULT; + goto out_err; + } + + ename = &req->rc->sdata[req->rc->offset]; + if (len > inline_len) { + /* We have error in external buffer */ + if (kern_buf) { + memcpy(ename + inline_len, uidata, + len - inline_len); + } else { + err = copy_from_user(ename + inline_len, + uidata, len - inline_len); + if (err) { + err = -EFAULT; + goto out_err; + } + } + } + ename = NULL; + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); + if (err) + goto out_err; + + if (p9_is_proto_dotu(c)) + err = -ecode; + + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); + + p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); + } + kfree(ename); + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } + return err; + +out_err: + p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + return err; +} + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); + +/** + * p9_client_flush - flush (cancel) a request + * @c: client state + * @oldreq: request to cancel + * + * This sents a flush for a particular request and links + * the flush request to the original request. The current + * code only supports a single flush request although the protocol + * allows for multiple flush requests to be sent for a single request. + * + */ + +static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) +{ + struct p9_req_t *req; + int16_t oldtag; + int err; + + err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1); + if (err) + return err; + + p9_debug(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); + + req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag); + if (IS_ERR(req)) + return PTR_ERR(req); + + /* + * if we haven't received a response for oldreq, + * remove it from the list + */ + if (oldreq->status == REQ_STATUS_SENT) + if (c->trans_mod->cancelled) + c->trans_mod->cancelled(c, oldreq); + + p9_free_req(c, req); + return 0; +} + +static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, + int8_t type, int req_size, + const char *fmt, va_list ap) +{ + int tag, err; + struct p9_req_t *req; + + p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type); + + /* we allow for any status other than disconnected */ + if (c->status == Disconnected) + return ERR_PTR(-EIO); + + /* if status is begin_disconnected we allow only clunk request */ + if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) + return ERR_PTR(-EIO); + + tag = P9_NOTAG; + if (type != P9_TVERSION) { + tag = p9_idpool_get(c->tagpool); + if (tag < 0) + return ERR_PTR(-ENOMEM); } - err = p9_client_rpc(clnt, tc, &rc); + req = p9_tag_alloc(c, tag, req_size); + if (IS_ERR(req)) + return req; + + /* marshall the data */ + p9pdu_prepare(req->tc, tag, type); + err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); if (err) + goto reterr; + p9pdu_finalize(c, req->tc); + trace_9p_client_req(c, type, tag); + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + req = p9_client_prepare_req(c, type, c->msize, fmt, ap); + va_end(ap); + if (IS_ERR(req)) + return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; + + err = c->trans_mod->request(c, req); + if (err < 0) { + if (err != -ERESTARTSYS && err != -EFAULT) + c->status = Disconnected; + goto reterr; + } +again: + /* Wait for the response */ + err = wait_event_interruptible(*req->wq, + req->status >= REQ_STATUS_RCVD); + + /* + * Make sure our req is coherent with regard to updates in other + * threads - echoes to wmb() in the callback + */ + smp_rmb(); + + if ((err == -ERESTARTSYS) && (c->status == Connected) + && (type == P9_TFLUSH)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + goto again; + } + + if (req->status == REQ_STATUS_ERROR) { + p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } + if ((err == -ERESTARTSYS) && (c->status == Connected)) { + p9_debug(P9_DEBUG_MUX, "flushing\n"); + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + + if (c->trans_mod->cancel(c, req)) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ + if (req->status == REQ_STATUS_RCVD) + err = 0; + } + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + if (err < 0) + goto reterr; + + err = p9_check_errors(c, req); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(safe_errno(err)); +} + +/** + * p9_client_zc_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ +static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, + char *uidata, char *uodata, + int inlen, int olen, int in_hdrlen, + int kern_buf, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + /* + * We allocate a inline protocol data of only 4k bytes. + * The actual content is passed in zero-copy fashion. + */ + req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); + va_end(ap); + if (IS_ERR(req)) + return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; + + /* If we are called with KERNEL_DS force kern_buf */ + if (segment_eq(get_fs(), KERNEL_DS)) + kern_buf = 1; + + err = c->trans_mod->zc_request(c, req, uidata, uodata, + inlen, olen, in_hdrlen, kern_buf); + if (err < 0) { + if (err == -EIO) + c->status = Disconnected; + goto reterr; + } + if (req->status == REQ_STATUS_ERROR) { + p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } + if ((err == -ERESTARTSYS) && (c->status == Connected)) { + p9_debug(P9_DEBUG_MUX, "flushing\n"); + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + + if (c->trans_mod->cancel(c, req)) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ + if (req->status == REQ_STATUS_RCVD) + err = 0; + } + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + if (err < 0) + goto reterr; + + err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(safe_errno(err)); +} + +static struct p9_fid *p9_fid_create(struct p9_client *clnt) +{ + int ret; + struct p9_fid *fid; + unsigned long flags; + + p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt); + fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); + if (!fid) + return ERR_PTR(-ENOMEM); + + ret = p9_idpool_get(clnt->fidpool); + if (ret < 0) { + ret = -ENOSPC; goto error; + } + fid->fid = ret; + + memset(&fid->qid, 0, sizeof(struct p9_qid)); + fid->mode = -1; + fid->uid = current_fsuid(); + fid->clnt = clnt; + fid->rdir = NULL; + spin_lock_irqsave(&clnt->lock, flags); + list_add(&fid->flist, &clnt->fidlist); + spin_unlock_irqrestore(&clnt->lock, flags); + + return fid; - version = &rc->params.rversion.version; - if (version->len == 8 && !memcmp(version->str, "9P2000.u", 8)) - clnt->dotu = 1; - else if (version->len == 6 && !memcmp(version->str, "9P2000", 6)) - clnt->dotu = 0; +error: + kfree(fid); + return ERR_PTR(ret); +} + +static void p9_fid_destroy(struct p9_fid *fid) +{ + struct p9_client *clnt; + unsigned long flags; + + p9_debug(P9_DEBUG_FID, "fid %d\n", fid->fid); + clnt = fid->clnt; + p9_idpool_put(fid->fid, clnt->fidpool); + spin_lock_irqsave(&clnt->lock, flags); + list_del(&fid->flist); + spin_unlock_irqrestore(&clnt->lock, flags); + kfree(fid->rdir); + kfree(fid); +} + +static int p9_client_version(struct p9_client *c) +{ + int err = 0; + struct p9_req_t *req; + char *version; + int msize; + + p9_debug(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", + c->msize, c->proto_version); + + switch (c->proto_version) { + case p9_proto_2000L: + req = p9_client_rpc(c, P9_TVERSION, "ds", + c->msize, "9P2000.L"); + break; + case p9_proto_2000u: + req = p9_client_rpc(c, P9_TVERSION, "ds", + c->msize, "9P2000.u"); + break; + case p9_proto_legacy: + req = p9_client_rpc(c, P9_TVERSION, "ds", + c->msize, "9P2000"); + break; + default: + return -EINVAL; + break; + } + + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version); + if (err) { + p9_debug(P9_DEBUG_9P, "version error %d\n", err); + trace_9p_protocol_dump(c, req->rc); + goto error; + } + + p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); + if (!strncmp(version, "9P2000.L", 8)) + c->proto_version = p9_proto_2000L; + else if (!strncmp(version, "9P2000.u", 8)) + c->proto_version = p9_proto_2000u; + else if (!strncmp(version, "9P2000", 6)) + c->proto_version = p9_proto_legacy; else { err = -EREMOTEIO; goto error; } - n = rc->params.rversion.msize; - if (n < clnt->msize) - clnt->msize = n; + if (msize < c->msize) + c->msize = msize; + +error: + kfree(version); + p9_free_req(c, req); + + return err; +} + +struct p9_client *p9_client_create(const char *dev_name, char *options) +{ + int err; + struct p9_client *clnt; + char *client_id; + + err = 0; + clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); + if (!clnt) + return ERR_PTR(-ENOMEM); + + clnt->trans_mod = NULL; + clnt->trans = NULL; + + client_id = utsname()->nodename; + memcpy(clnt->name, client_id, strlen(client_id) + 1); + + spin_lock_init(&clnt->lock); + INIT_LIST_HEAD(&clnt->fidlist); + + err = p9_tag_init(clnt); + if (err < 0) + goto free_client; + + err = parse_opts(options, clnt); + if (err < 0) + goto destroy_tagpool; + + if (!clnt->trans_mod) + clnt->trans_mod = v9fs_get_default_trans(); + + if (clnt->trans_mod == NULL) { + err = -EPROTONOSUPPORT; + p9_debug(P9_DEBUG_ERROR, + "No transport defined or default transport\n"); + goto destroy_tagpool; + } + + clnt->fidpool = p9_idpool_create(); + if (IS_ERR(clnt->fidpool)) { + err = PTR_ERR(clnt->fidpool); + goto put_trans; + } + + p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", + clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); + + err = clnt->trans_mod->create(clnt, dev_name, options); + if (err) + goto destroy_fidpool; + + if (clnt->msize > clnt->trans_mod->maxsize) + clnt->msize = clnt->trans_mod->maxsize; + + err = p9_client_version(clnt); + if (err) + goto close_trans; - kfree(tc); - kfree(rc); return clnt; -error: - kfree(tc); - kfree(rc); - p9_client_destroy(clnt); +close_trans: + clnt->trans_mod->close(clnt); +destroy_fidpool: + p9_idpool_destroy(clnt->fidpool); +put_trans: + v9fs_put_trans(clnt->trans_mod); +destroy_tagpool: + p9_idpool_destroy(clnt->tagpool); +free_client: + kfree(clnt); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_create); @@ -211,44 +1072,52 @@ void p9_client_destroy(struct p9_client *clnt) { struct p9_fid *fid, *fidptr; - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_MUX, "clnt %p\n", clnt); - if (clnt->trans) { - clnt->trans->close(clnt->trans); - kfree(clnt->trans); - clnt->trans = NULL; - } + if (clnt->trans_mod) + clnt->trans_mod->close(clnt); + + v9fs_put_trans(clnt->trans_mod); - list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) + list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) { + pr_info("Found fid %d not clunked\n", fid->fid); p9_fid_destroy(fid); + } if (clnt->fidpool) p9_idpool_destroy(clnt->fidpool); + p9_tag_cleanup(clnt); + kfree(clnt); } EXPORT_SYMBOL(p9_client_destroy); void p9_client_disconnect(struct p9_client *clnt) { - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); - clnt->trans->status = Disconnected; + p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt); + clnt->status = Disconnected; } EXPORT_SYMBOL(p9_client_disconnect); +void p9_client_begin_disconnect(struct p9_client *clnt) +{ + p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt); + clnt->status = BeginDisconnect; +} +EXPORT_SYMBOL(p9_client_begin_disconnect); + struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, - char *uname, u32 n_uname, char *aname) + char *uname, kuid_t n_uname, char *aname) { - int err; - struct p9_fcall *tc, *rc; + int err = 0; + struct p9_req_t *req; struct p9_fid *fid; + struct p9_qid qid; - P9_DPRINTK(P9_DEBUG_9P, "clnt %p afid %d uname %s aname %s\n", - clnt, afid?afid->fid:-1, uname, aname); - err = 0; - tc = NULL; - rc = NULL; + p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", + afid ? afid->fid : -1, uname, aname); fid = p9_fid_create(clnt); if (IS_ERR(fid)) { err = PTR_ERR(fid); @@ -256,90 +1125,47 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - tc = p9_create_tattach(fid->fid, afid?afid->fid:P9_NOFID, uname, aname, - n_uname, clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; + req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid, + afid ? afid->fid : P9_NOFID, uname, aname, n_uname); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; - - memmove(&fid->qid, &rc->params.rattach.qid, sizeof(struct p9_qid)); - kfree(tc); - kfree(rc); - return fid; - -error: - kfree(tc); - kfree(rc); - if (fid) - p9_fid_destroy(fid); - return ERR_PTR(err); -} -EXPORT_SYMBOL(p9_client_attach); - -struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, - u32 n_uname, char *aname) -{ - int err; - struct p9_fcall *tc, *rc; - struct p9_fid *fid; - - P9_DPRINTK(P9_DEBUG_9P, "clnt %p uname %s aname %s\n", clnt, uname, - aname); - err = 0; - tc = NULL; - rc = NULL; - - fid = p9_fid_create(clnt); - if (IS_ERR(fid)) { - err = PTR_ERR(fid); - fid = NULL; + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + p9_free_req(clnt, req); goto error; } - tc = p9_create_tauth(fid->fid, uname, aname, n_uname, clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } + p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", + qid.type, (unsigned long long)qid.path, qid.version); - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; + memmove(&fid->qid, &qid, sizeof(struct p9_qid)); - memmove(&fid->qid, &rc->params.rauth.qid, sizeof(struct p9_qid)); - kfree(tc); - kfree(rc); + p9_free_req(clnt, req); return fid; error: - kfree(tc); - kfree(rc); if (fid) p9_fid_destroy(fid); return ERR_PTR(err); } -EXPORT_SYMBOL(p9_client_auth); +EXPORT_SYMBOL(p9_client_attach); -struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, - int clone) +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, + char **wnames, int clone) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; struct p9_fid *fid; + struct p9_qid *wqids; + struct p9_req_t *req; + uint16_t nwqids, count; - P9_DPRINTK(P9_DEBUG_9P, "fid %d nwname %d wname[0] %s\n", - oldfid->fid, nwname, wnames?wnames[0]:NULL); err = 0; - tc = NULL; - rc = NULL; + wqids = NULL; clnt = oldfid->clnt; if (clone) { fid = p9_fid_create(clnt); @@ -353,53 +1179,52 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, } else fid = oldfid; - tc = p9_create_twalk(oldfid->fid, fid->fid, nwname, wnames); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; + + p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", + oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); + + req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, + nwname, wnames); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - err = p9_client_rpc(clnt, tc, &rc); + err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { - if (rc && rc->id == P9_RWALK) - goto clunk_fid; - else - goto error; + trace_9p_protocol_dump(clnt, req->rc); + p9_free_req(clnt, req); + goto clunk_fid; } + p9_free_req(clnt, req); + + p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); - if (rc->params.rwalk.nwqid != nwname) { + if (nwqids != nwname) { err = -ENOENT; goto clunk_fid; } + for (count = 0; count < nwqids; count++) + p9_debug(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", + count, wqids[count].type, + (unsigned long long)wqids[count].path, + wqids[count].version); + if (nwname) - memmove(&fid->qid, - &rc->params.rwalk.wqids[rc->params.rwalk.nwqid - 1], - sizeof(struct p9_qid)); + memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid)); else fid->qid = oldfid->qid; - kfree(tc); - kfree(rc); + kfree(wqids); return fid; clunk_fid: - kfree(tc); - kfree(rc); - rc = NULL; - tc = p9_create_tclunk(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - p9_client_rpc(clnt, tc, &rc); + kfree(wqids); + p9_client_clunk(fid); + fid = NULL; error: - kfree(tc); - kfree(rc); if (fid && (fid != oldfid)) p9_fid_destroy(fid); @@ -410,654 +1235,1044 @@ EXPORT_SYMBOL(p9_client_walk); int p9_client_open(struct p9_fid *fid, int mode) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; + struct p9_req_t *req; + struct p9_qid qid; + int iounit; - P9_DPRINTK(P9_DEBUG_9P, "fid %d mode %d\n", fid->fid, mode); - err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; + p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", + p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode); + err = 0; if (fid->mode != -1) return -EINVAL; - tc = p9_create_topen(fid->fid, mode); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + if (p9_is_proto_dotl(clnt)) + req = p9_client_rpc(clnt, P9_TLOPEN, "dd", fid->fid, mode); + else + req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + goto free_and_error; + } + + p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", + p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type, + (unsigned long long)qid.path, qid.version, iounit); fid->mode = mode; - fid->iounit = rc->params.ropen.iounit; + fid->iounit = iounit; -done: - kfree(tc); - kfree(rc); +free_and_error: + p9_free_req(clnt, req); +error: return err; } EXPORT_SYMBOL(p9_client_open); +int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, + kgid_t gid, struct p9_qid *qid) +{ + int err = 0; + struct p9_client *clnt; + struct p9_req_t *req; + int iounit; + + p9_debug(P9_DEBUG_9P, + ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", + ofid->fid, name, flags, mode, + from_kgid(&init_user_ns, gid)); + clnt = ofid->clnt; + + if (ofid->mode != -1) + return -EINVAL; + + req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags, + mode, gid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + goto free_and_error; + } + + p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", + qid->type, + (unsigned long long)qid->path, + qid->version, iounit); + + ofid->mode = mode; + ofid->iounit = iounit; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_create_dotl); + int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, char *extension) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; + struct p9_req_t *req; + struct p9_qid qid; + int iounit; - P9_DPRINTK(P9_DEBUG_9P, "fid %d name %s perm %d mode %d\n", fid->fid, - name, perm, mode); + p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", + fid->fid, name, perm, mode); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; if (fid->mode != -1) return -EINVAL; - tc = p9_create_tcreate(fid->fid, name, perm, mode, extension, - clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm, + mode, extension); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + goto free_and_error; + } + + p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; - fid->iounit = rc->params.ropen.iounit; + fid->iounit = iounit; -done: - kfree(tc); - kfree(rc); +free_and_error: + p9_free_req(clnt, req); +error: return err; } EXPORT_SYMBOL(p9_client_fcreate); -int p9_client_clunk(struct p9_fid *fid) +int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid, + struct p9_qid *qid) { - int err; - struct p9_fcall *tc, *rc; + int err = 0; struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); - err = 0; - tc = NULL; - rc = NULL; - clnt = fid->clnt; + p9_debug(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", + dfid->fid, name, symtgt); + clnt = dfid->clnt; - tc = p9_create_tclunk(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt, + gid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + goto free_and_error; + } - p9_fid_destroy(fid); + p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", + qid->type, (unsigned long long)qid->path, qid->version); -done: - kfree(tc); - kfree(rc); +free_and_error: + p9_free_req(clnt, req); +error: return err; } -EXPORT_SYMBOL(p9_client_clunk); +EXPORT_SYMBOL(p9_client_symlink); -int p9_client_remove(struct p9_fid *fid) +int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) +{ + struct p9_client *clnt; + struct p9_req_t *req; + + p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", + dfid->fid, oldfid->fid, newname); + clnt = dfid->clnt; + req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid, + newname); + if (IS_ERR(req)) + return PTR_ERR(req); + + p9_debug(P9_DEBUG_9P, "<<< RLINK\n"); + p9_free_req(clnt, req); + return 0; +} +EXPORT_SYMBOL(p9_client_link); + +int p9_client_fsync(struct p9_fid *fid, int datasync) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", + fid->fid, datasync); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - tc = p9_create_tremove(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid); - p9_fid_destroy(fid); + p9_free_req(clnt, req); -done: - kfree(tc); - kfree(rc); +error: return err; } -EXPORT_SYMBOL(p9_client_remove); +EXPORT_SYMBOL(p9_client_fsync); -int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count) +int p9_client_clunk(struct p9_fid *fid) { - int err, n, rsize, total; - struct p9_fcall *tc, *rc; + int err; struct p9_client *clnt; + struct p9_req_t *req; + int retries = 0; + + if (!fid) { + pr_warn("%s (%d): Trying to clunk with NULL fid\n", + __func__, task_pid_nr(current)); + dump_stack(); + return 0; + } - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu %d\n", fid->fid, - (long long unsigned) offset, count); +again: + p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid, + retries); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - total = 0; - - rsize = fid->iounit; - if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) - rsize = clnt->msize - P9_IOHDRSZ; - do { - if (count < rsize) - rsize = count; - - tc = p9_create_tread(fid->fid, offset, rsize); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; - - n = rc->params.rread.count; - if (n > count) - n = count; - - memmove(data, rc->params.rread.data, n); - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0 && n == rsize); + req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } - return total; + p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); + /* + * Fid is not valid even after a failed clunk + * If interrupted, retry once then give up and + * leak fid until umount. + */ + if (err == -ERESTARTSYS) { + if (retries++ == 0) + goto again; + } else + p9_fid_destroy(fid); return err; } -EXPORT_SYMBOL(p9_client_read); +EXPORT_SYMBOL(p9_client_clunk); -int p9_client_write(struct p9_fid *fid, char *data, u64 offset, u32 count) +int p9_client_remove(struct p9_fid *fid) { - int err, n, rsize, total; - struct p9_fcall *tc, *rc; + int err; struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, - (long long unsigned) offset, count); + p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - total = 0; - rsize = fid->iounit; - if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) - rsize = clnt->msize - P9_IOHDRSZ; + req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } - do { - if (count < rsize) - rsize = count; + p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); - tc = p9_create_twrite(fid->fid, offset, rsize, data); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } + p9_free_req(clnt, req); +error: + if (err == -ERESTARTSYS) + p9_client_clunk(fid); + else + p9_fid_destroy(fid); + return err; +} +EXPORT_SYMBOL(p9_client_remove); - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; +int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) +{ + int err = 0; + struct p9_req_t *req; + struct p9_client *clnt; - n = rc->params.rread.count; - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0); + p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", + dfid->fid, name, flags); - return total; + clnt = dfid->clnt; + req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name); + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); return err; } -EXPORT_SYMBOL(p9_client_write); +EXPORT_SYMBOL(p9_client_unlinkat); int -p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count) +p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, + u32 count) { - int err, n, rsize, total; - struct p9_fcall *tc, *rc; + char *dataptr; + int kernel_buf = 0; + struct p9_req_t *req; struct p9_client *clnt; + int err, rsize, non_zc = 0; + - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, - (long long unsigned) offset, count); + p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", + fid->fid, (unsigned long long) offset, count); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - total = 0; rsize = fid->iounit; if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; - do { - if (count < rsize) - rsize = count; - - tc = p9_create_tread(fid->fid, offset, rsize); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } + if (count < rsize) + rsize = count; + + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *indata; + if (data) { + kernel_buf = 1; + indata = data; + } else + indata = (__force char *)udata; + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0, + 11, kernel_buf, "dqd", fid->fid, + offset, rsize); + } else { + non_zc = 1; + req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, + rsize); + } + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; + err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + goto free_and_error; + } - n = rc->params.rread.count; - if (n > count) - n = count; + p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - err = copy_to_user(data, rc->params.rread.data, n); - if (err) { - err = -EFAULT; - goto error; + if (non_zc) { + if (data) { + memmove(data, dataptr, count); + } else { + err = copy_to_user(udata, dataptr, count); + if (err) { + err = -EFAULT; + goto free_and_error; + } } + } + p9_free_req(clnt, req); + return count; - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0 && n == rsize); - - return total; - +free_and_error: + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); return err; } -EXPORT_SYMBOL(p9_client_uread); +EXPORT_SYMBOL(p9_client_read); int -p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset, - u32 count) +p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, + u64 offset, u32 count) { - int err, n, rsize, total; - struct p9_fcall *tc, *rc; + int err, rsize; + int kernel_buf = 0; struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, - (long long unsigned) offset, count); + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", + fid->fid, (unsigned long long) offset, count); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - total = 0; rsize = fid->iounit; if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; - do { - if (count < rsize) - rsize = count; - - tc = p9_create_twrite_u(fid->fid, offset, rsize, data); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } + if (count < rsize) + rsize = count; + + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *odata; + if (data) { + kernel_buf = 1; + odata = data; + } else + odata = (char *)udata; + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize, + P9_ZC_HDR_SZ, kernel_buf, "dqd", + fid->fid, offset, rsize); + } else { + if (data) + req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, + offset, rsize, data); + else + req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, + offset, rsize, udata); + } + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; + err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + goto free_and_error; + } - n = rc->params.rread.count; - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0); + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); - return total; + p9_free_req(clnt, req); + return count; +free_and_error: + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); return err; } -EXPORT_SYMBOL(p9_client_uwrite); +EXPORT_SYMBOL(p9_client_write); -int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count) +struct p9_wstat *p9_client_stat(struct p9_fid *fid) { - int n, total; - - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, - (long long unsigned) offset, count); - n = 0; - total = 0; - while (count) { - n = p9_client_read(fid, data, offset, count); - if (n <= 0) - break; + int err; + struct p9_client *clnt; + struct p9_wstat *ret = kmalloc(sizeof(struct p9_wstat), GFP_KERNEL); + struct p9_req_t *req; + u16 ignored; - data += n; - offset += n; - count -= n; - total += n; + p9_debug(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); + + if (!ret) + return ERR_PTR(-ENOMEM); + + err = 0; + clnt = fid->clnt; + + req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - if (n < 0) - total = n; + err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + p9_free_req(clnt, req); + goto error; + } - return total; + p9_debug(P9_DEBUG_9P, + "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n" + "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" + "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" + "<<< uid=%d gid=%d n_muid=%d\n", + ret->size, ret->type, ret->dev, ret->qid.type, + (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, (unsigned long long)ret->length, + ret->name, ret->uid, ret->gid, ret->muid, ret->extension, + from_kuid(&init_user_ns, ret->n_uid), + from_kgid(&init_user_ns, ret->n_gid), + from_kuid(&init_user_ns, ret->n_muid)); + + p9_free_req(clnt, req); + return ret; + +error: + kfree(ret); + return ERR_PTR(err); } -EXPORT_SYMBOL(p9_client_readn); +EXPORT_SYMBOL(p9_client_stat); -struct p9_stat *p9_client_stat(struct p9_fid *fid) +struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, + u64 request_mask) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; - struct p9_stat *ret; + struct p9_stat_dotl *ret = kmalloc(sizeof(struct p9_stat_dotl), + GFP_KERNEL); + struct p9_req_t *req; + + p9_debug(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", + fid->fid, request_mask); + + if (!ret) + return ERR_PTR(-ENOMEM); - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); err = 0; - tc = NULL; - rc = NULL; - ret = NULL; clnt = fid->clnt; - tc = p9_create_tstat(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; + req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; - - ret = p9_clone_stat(&rc->params.rstat.stat, clnt->dotu); - if (IS_ERR(ret)) { - err = PTR_ERR(ret); - ret = NULL; + err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + p9_free_req(clnt, req); goto error; } - kfree(tc); - kfree(rc); + p9_debug(P9_DEBUG_9P, + "<<< RGETATTR st_result_mask=%lld\n" + "<<< qid=%x.%llx.%x\n" + "<<< st_mode=%8.8x st_nlink=%llu\n" + "<<< st_uid=%d st_gid=%d\n" + "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n" + "<<< st_atime_sec=%lld st_atime_nsec=%lld\n" + "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n" + "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n" + "<<< st_btime_sec=%lld st_btime_nsec=%lld\n" + "<<< st_gen=%lld st_data_version=%lld", + ret->st_result_mask, ret->qid.type, ret->qid.path, + ret->qid.version, ret->st_mode, ret->st_nlink, + from_kuid(&init_user_ns, ret->st_uid), + from_kgid(&init_user_ns, ret->st_gid), + ret->st_rdev, ret->st_size, ret->st_blksize, + ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec, + ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec, + ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec, + ret->st_gen, ret->st_data_version); + + p9_free_req(clnt, req); return ret; error: - kfree(tc); - kfree(rc); kfree(ret); return ERR_PTR(err); } -EXPORT_SYMBOL(p9_client_stat); +EXPORT_SYMBOL(p9_client_getattr_dotl); + +static int p9_client_statsize(struct p9_wstat *wst, int proto_version) +{ + int ret; + + /* NOTE: size shouldn't include its own length */ + /* size[2] type[2] dev[4] qid[13] */ + /* mode[4] atime[4] mtime[4] length[8]*/ + /* name[s] uid[s] gid[s] muid[s] */ + ret = 2+4+13+4+4+4+8+2+2+2+2; + + if (wst->name) + ret += strlen(wst->name); + if (wst->uid) + ret += strlen(wst->uid); + if (wst->gid) + ret += strlen(wst->gid); + if (wst->muid) + ret += strlen(wst->muid); + + if ((proto_version == p9_proto_2000u) || + (proto_version == p9_proto_2000L)) { + ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ + if (wst->extension) + ret += strlen(wst->extension); + } + + return ret; +} int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) { int err; - struct p9_fcall *tc, *rc; + struct p9_req_t *req; struct p9_client *clnt; - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - - tc = p9_create_twstat(fid->fid, wst, clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + wst->size = p9_client_statsize(wst, clnt->proto_version); + p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, + " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" + " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" + " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" + " uid=%d gid=%d n_muid=%d\n", + wst->size, wst->type, wst->dev, wst->qid.type, + (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, (unsigned long long)wst->length, + wst->name, wst->uid, wst->gid, wst->muid, wst->extension, + from_kuid(&init_user_ns, wst->n_uid), + from_kgid(&init_user_ns, wst->n_gid), + from_kuid(&init_user_ns, wst->n_muid)); + + req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); + p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); -done: - kfree(tc); - kfree(rc); + p9_free_req(clnt, req); +error: return err; } EXPORT_SYMBOL(p9_client_wstat); -struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset) +int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) { - int err, n, m; - struct p9_fcall *tc, *rc; + int err; + struct p9_req_t *req; struct p9_client *clnt; - struct p9_stat st, *ret; - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu\n", fid->fid, - (long long unsigned) offset); err = 0; - tc = NULL; - rc = NULL; - ret = NULL; clnt = fid->clnt; + p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, + " valid=%x mode=%x uid=%d gid=%d size=%lld\n" + " atime_sec=%lld atime_nsec=%lld\n" + " mtime_sec=%lld mtime_nsec=%lld\n", + p9attr->valid, p9attr->mode, + from_kuid(&init_user_ns, p9attr->uid), + from_kgid(&init_user_ns, p9attr->gid), + p9attr->size, p9attr->atime_sec, p9attr->atime_nsec, + p9attr->mtime_sec, p9attr->mtime_nsec); + + req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr); + + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_setattr); - /* if the offset is below or above the current response, free it */ - if (offset < fid->rdir_fpos || (fid->rdir_fcall && - offset >= fid->rdir_fpos+fid->rdir_fcall->params.rread.count)) { - fid->rdir_pos = 0; - if (fid->rdir_fcall) - fid->rdir_fpos += fid->rdir_fcall->params.rread.count; - - kfree(fid->rdir_fcall); - fid->rdir_fcall = NULL; - if (offset < fid->rdir_fpos) - fid->rdir_fpos = 0; - } - - if (!fid->rdir_fcall) { - n = fid->iounit; - if (!n || n > clnt->msize-P9_IOHDRSZ) - n = clnt->msize - P9_IOHDRSZ; - - while (1) { - if (fid->rdir_fcall) { - fid->rdir_fpos += - fid->rdir_fcall->params.rread.count; - kfree(fid->rdir_fcall); - fid->rdir_fcall = NULL; - } - - tc = p9_create_tread(fid->fid, fid->rdir_fpos, n); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } +int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; + err = 0; + clnt = fid->clnt; - n = rc->params.rread.count; - if (n == 0) - goto done; + p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid); - fid->rdir_fcall = rc; - rc = NULL; - if (offset >= fid->rdir_fpos && - offset < fid->rdir_fpos+n) - break; - } + req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } - fid->rdir_pos = 0; + err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type, + &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail, + &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + p9_free_req(clnt, req); + goto error; } - m = offset - fid->rdir_fpos; - if (m < 0) - goto done; + p9_debug(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld " + "blocks %llu bfree %llu bavail %llu files %llu ffree %llu " + "fsid %llu namelen %ld\n", + fid->fid, (long unsigned int)sb->type, (long int)sb->bsize, + sb->blocks, sb->bfree, sb->bavail, sb->files, sb->ffree, + sb->fsid, (long int)sb->namelen); + + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_statfs); + +int p9_client_rename(struct p9_fid *fid, + struct p9_fid *newdirfid, const char *name) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + clnt = fid->clnt; - n = p9_deserialize_stat(fid->rdir_fcall->params.rread.data + m, - fid->rdir_fcall->params.rread.count - m, &st, clnt->dotu); + p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", + fid->fid, newdirfid->fid, name); - if (!n) { - err = -EIO; + req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid, + newdirfid->fid, name); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - fid->rdir_pos += n; - st.size = n; - ret = p9_clone_stat(&st, clnt->dotu); - if (IS_ERR(ret)) { - err = PTR_ERR(ret); - ret = NULL; + p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid); + + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_rename); + +int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, + struct p9_fid *newdirfid, const char *new_name) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + clnt = olddirfid->clnt; + + p9_debug(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s" + " newdirfid %d new name %s\n", olddirfid->fid, old_name, + newdirfid->fid, new_name); + + req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid, + old_name, newdirfid->fid, new_name); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } -done: - kfree(tc); - kfree(rc); - return ret; + p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", + newdirfid->fid, new_name); + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); - kfree(ret); + return err; +} +EXPORT_SYMBOL(p9_client_renameat); + +/* + * An xattrwalk without @attr_name gives the fid for the lisxattr namespace + */ +struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, + const char *attr_name, u64 *attr_size) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + struct p9_fid *attr_fid; + + err = 0; + clnt = file_fid->clnt; + attr_fid = p9_fid_create(clnt); + if (IS_ERR(attr_fid)) { + err = PTR_ERR(attr_fid); + attr_fid = NULL; + goto error; + } + p9_debug(P9_DEBUG_9P, + ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n", + file_fid->fid, attr_fid->fid, attr_name); + + req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds", + file_fid->fid, attr_fid->fid, attr_name); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + p9_free_req(clnt, req); + goto clunk_fid; + } + p9_free_req(clnt, req); + p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", + attr_fid->fid, *attr_size); + return attr_fid; +clunk_fid: + p9_client_clunk(attr_fid); + attr_fid = NULL; +error: + if (attr_fid && (attr_fid != file_fid)) + p9_fid_destroy(attr_fid); + return ERR_PTR(err); } -EXPORT_SYMBOL(p9_client_dirread); +EXPORT_SYMBOL_GPL(p9_client_xattrwalk); -static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu) +int p9_client_xattrcreate(struct p9_fid *fid, const char *name, + u64 attr_size, int flags) { - int n; - char *p; - struct p9_stat *ret; + int err; + struct p9_req_t *req; + struct p9_client *clnt; - n = sizeof(struct p9_stat) + st->name.len + st->uid.len + st->gid.len + - st->muid.len; + p9_debug(P9_DEBUG_9P, + ">>> TXATTRCREATE fid %d name %s size %lld flag %d\n", + fid->fid, name, (long long)attr_size, flags); + err = 0; + clnt = fid->clnt; + req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd", + fid->fid, name, attr_size, flags); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL_GPL(p9_client_xattrcreate); - if (dotu) - n += st->extension.len; +int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) +{ + int err, rsize, non_zc = 0; + struct p9_client *clnt; + struct p9_req_t *req; + char *dataptr; - ret = kmalloc(n, GFP_KERNEL); - if (!ret) - return ERR_PTR(-ENOMEM); + p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", + fid->fid, (unsigned long long) offset, count); - memmove(ret, st, sizeof(struct p9_stat)); - p = ((char *) ret) + sizeof(struct p9_stat); - memmove(p, st->name.str, st->name.len); - ret->name.str = p; - p += st->name.len; - memmove(p, st->uid.str, st->uid.len); - ret->uid.str = p; - p += st->uid.len; - memmove(p, st->gid.str, st->gid.len); - ret->gid.str = p; - p += st->gid.len; - memmove(p, st->muid.str, st->muid.len); - ret->muid.str = p; - p += st->muid.len; - - if (dotu) { - memmove(p, st->extension.str, st->extension.len); - ret->extension.str = p; - p += st->extension.len; + err = 0; + clnt = fid->clnt; + + rsize = fid->iounit; + if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ) + rsize = clnt->msize - P9_READDIRHDRSZ; + + if (count < rsize) + rsize = count; + + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0, + 11, 1, "dqd", fid->fid, offset, rsize); + } else { + non_zc = 1; + req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, + offset, rsize); + } + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - return ret; + err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + goto free_and_error; + } + + p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); + + if (non_zc) + memmove(data, dataptr, count); + + p9_free_req(clnt, req); + return count; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; } +EXPORT_SYMBOL(p9_client_readdir); -static struct p9_fid *p9_fid_create(struct p9_client *clnt) +int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, + dev_t rdev, kgid_t gid, struct p9_qid *qid) { int err; - struct p9_fid *fid; + struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); - fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); - if (!fid) - return ERR_PTR(-ENOMEM); + err = 0; + clnt = fid->clnt; + p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " + "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); + req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode, + MAJOR(rdev), MINOR(rdev), gid); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + goto error; + } + p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, + (unsigned long long)qid->path, qid->version); - fid->fid = p9_idpool_get(clnt->fidpool); - if (fid->fid < 0) { - err = -ENOSPC; +error: + p9_free_req(clnt, req); + return err; + +} +EXPORT_SYMBOL(p9_client_mknod_dotl); + +int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, + kgid_t gid, struct p9_qid *qid) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", + fid->fid, name, mode, from_kgid(&init_user_ns, gid)); + req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode, + gid); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); goto error; } + p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, + (unsigned long long)qid->path, qid->version); - memset(&fid->qid, 0, sizeof(struct p9_qid)); - fid->mode = -1; - fid->rdir_fpos = 0; - fid->rdir_pos = 0; - fid->rdir_fcall = NULL; - fid->uid = current->fsuid; - fid->clnt = clnt; - fid->aux = NULL; +error: + p9_free_req(clnt, req); + return err; - spin_lock(&clnt->lock); - list_add(&fid->flist, &clnt->fidlist); - spin_unlock(&clnt->lock); +} +EXPORT_SYMBOL(p9_client_mkdir_dotl); - return fid; +int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + p9_debug(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d " + "start %lld length %lld proc_id %d client_id %s\n", + fid->fid, flock->type, flock->flags, flock->start, + flock->length, flock->proc_id, flock->client_id); + + req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type, + flock->flags, flock->start, flock->length, + flock->proc_id, flock->client_id); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "b", status); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + goto error; + } + p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); error: - kfree(fid); - return ERR_PTR(err); + p9_free_req(clnt, req); + return err; + } +EXPORT_SYMBOL(p9_client_lock_dotl); -static void p9_fid_destroy(struct p9_fid *fid) +int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) { + int err; struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + err = 0; clnt = fid->clnt; - p9_idpool_put(fid->fid, clnt->fidpool); - spin_lock(&clnt->lock); - list_del(&fid->flist); - spin_unlock(&clnt->lock); - kfree(fid->rdir_fcall); - kfree(fid); + p9_debug(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld " + "length %lld proc_id %d client_id %s\n", fid->fid, glock->type, + glock->start, glock->length, glock->proc_id, glock->client_id); + + req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid, glock->type, + glock->start, glock->length, glock->proc_id, glock->client_id); + + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type, + &glock->start, &glock->length, &glock->proc_id, + &glock->client_id); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + goto error; + } + p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " + "proc_id %d client_id %s\n", glock->type, glock->start, + glock->length, glock->proc_id, glock->client_id); +error: + p9_free_req(clnt, req); + return err; +} +EXPORT_SYMBOL(p9_client_getlock_dotl); + +int p9_client_readlink(struct p9_fid *fid, char **target) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid); + + req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "s", target); + if (err) { + trace_9p_protocol_dump(clnt, req->rc); + goto error; + } + p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); +error: + p9_free_req(clnt, req); + return err; } +EXPORT_SYMBOL(p9_client_readlink); diff --git a/net/9p/conv.c b/net/9p/conv.c deleted file mode 100644 index 3fe35d532c8..00000000000 --- a/net/9p/conv.c +++ /dev/null @@ -1,924 +0,0 @@ -/* - * net/9p/conv.c - * - * 9P protocol conversion functions - * - * Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/sched.h> -#include <linux/idr.h> -#include <linux/uaccess.h> -#include <net/9p/9p.h> - -/* - * Buffer to help with string parsing - */ -struct cbuf { - unsigned char *sp; - unsigned char *p; - unsigned char *ep; -}; - -static inline void buf_init(struct cbuf *buf, void *data, int datalen) -{ - buf->sp = buf->p = data; - buf->ep = data + datalen; -} - -static inline int buf_check_overflow(struct cbuf *buf) -{ - return buf->p > buf->ep; -} - -static int buf_check_size(struct cbuf *buf, int len) -{ - if (buf->p + len > buf->ep) { - if (buf->p < buf->ep) { - P9_EPRINTK(KERN_ERR, - "buffer overflow: want %d has %d\n", len, - (int)(buf->ep - buf->p)); - dump_stack(); - buf->p = buf->ep + 1; - } - - return 0; - } - - return 1; -} - -static void *buf_alloc(struct cbuf *buf, int len) -{ - void *ret = NULL; - - if (buf_check_size(buf, len)) { - ret = buf->p; - buf->p += len; - } - - return ret; -} - -static void buf_put_int8(struct cbuf *buf, u8 val) -{ - if (buf_check_size(buf, 1)) { - buf->p[0] = val; - buf->p++; - } -} - -static void buf_put_int16(struct cbuf *buf, u16 val) -{ - if (buf_check_size(buf, 2)) { - *(__le16 *) buf->p = cpu_to_le16(val); - buf->p += 2; - } -} - -static void buf_put_int32(struct cbuf *buf, u32 val) -{ - if (buf_check_size(buf, 4)) { - *(__le32 *)buf->p = cpu_to_le32(val); - buf->p += 4; - } -} - -static void buf_put_int64(struct cbuf *buf, u64 val) -{ - if (buf_check_size(buf, 8)) { - *(__le64 *)buf->p = cpu_to_le64(val); - buf->p += 8; - } -} - -static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) -{ - char *ret; - - ret = NULL; - if (buf_check_size(buf, slen + 2)) { - buf_put_int16(buf, slen); - ret = buf->p; - memcpy(buf->p, s, slen); - buf->p += slen; - } - - return ret; -} - -static u8 buf_get_int8(struct cbuf *buf) -{ - u8 ret = 0; - - if (buf_check_size(buf, 1)) { - ret = buf->p[0]; - buf->p++; - } - - return ret; -} - -static u16 buf_get_int16(struct cbuf *buf) -{ - u16 ret = 0; - - if (buf_check_size(buf, 2)) { - ret = le16_to_cpu(*(__le16 *)buf->p); - buf->p += 2; - } - - return ret; -} - -static u32 buf_get_int32(struct cbuf *buf) -{ - u32 ret = 0; - - if (buf_check_size(buf, 4)) { - ret = le32_to_cpu(*(__le32 *)buf->p); - buf->p += 4; - } - - return ret; -} - -static u64 buf_get_int64(struct cbuf *buf) -{ - u64 ret = 0; - - if (buf_check_size(buf, 8)) { - ret = le64_to_cpu(*(__le64 *)buf->p); - buf->p += 8; - } - - return ret; -} - -static void buf_get_str(struct cbuf *buf, struct p9_str *vstr) -{ - vstr->len = buf_get_int16(buf); - if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) { - vstr->str = buf->p; - buf->p += vstr->len; - } else { - vstr->len = 0; - vstr->str = NULL; - } -} - -static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid) -{ - qid->type = buf_get_int8(bufp); - qid->version = buf_get_int32(bufp); - qid->path = buf_get_int64(bufp); -} - -/** - * p9_size_wstat - calculate the size of a variable length stat struct - * @stat: metadata (stat) structure - * @dotu: non-zero if 9P2000.u - * - */ - -static int p9_size_wstat(struct p9_wstat *wstat, int dotu) -{ - int size = 0; - - if (wstat == NULL) { - P9_EPRINTK(KERN_ERR, "p9_size_stat: got a NULL stat pointer\n"); - return 0; - } - - size = /* 2 + *//* size[2] */ - 2 + /* type[2] */ - 4 + /* dev[4] */ - 1 + /* qid.type[1] */ - 4 + /* qid.vers[4] */ - 8 + /* qid.path[8] */ - 4 + /* mode[4] */ - 4 + /* atime[4] */ - 4 + /* mtime[4] */ - 8 + /* length[8] */ - 8; /* minimum sum of string lengths */ - - if (wstat->name) - size += strlen(wstat->name); - if (wstat->uid) - size += strlen(wstat->uid); - if (wstat->gid) - size += strlen(wstat->gid); - if (wstat->muid) - size += strlen(wstat->muid); - - if (dotu) { - size += 4 + /* n_uid[4] */ - 4 + /* n_gid[4] */ - 4 + /* n_muid[4] */ - 2; /* string length of extension[4] */ - if (wstat->extension) - size += strlen(wstat->extension); - } - - return size; -} - -/** - * buf_get_stat - safely decode a recieved metadata (stat) structure - * @bufp: buffer to deserialize - * @stat: metadata (stat) structure - * @dotu: non-zero if 9P2000.u - * - */ - -static void -buf_get_stat(struct cbuf *bufp, struct p9_stat *stat, int dotu) -{ - stat->size = buf_get_int16(bufp); - stat->type = buf_get_int16(bufp); - stat->dev = buf_get_int32(bufp); - stat->qid.type = buf_get_int8(bufp); - stat->qid.version = buf_get_int32(bufp); - stat->qid.path = buf_get_int64(bufp); - stat->mode = buf_get_int32(bufp); - stat->atime = buf_get_int32(bufp); - stat->mtime = buf_get_int32(bufp); - stat->length = buf_get_int64(bufp); - buf_get_str(bufp, &stat->name); - buf_get_str(bufp, &stat->uid); - buf_get_str(bufp, &stat->gid); - buf_get_str(bufp, &stat->muid); - - if (dotu) { - buf_get_str(bufp, &stat->extension); - stat->n_uid = buf_get_int32(bufp); - stat->n_gid = buf_get_int32(bufp); - stat->n_muid = buf_get_int32(bufp); - } -} - -/** - * p9_deserialize_stat - decode a received metadata structure - * @buf: buffer to deserialize - * @buflen: length of received buffer - * @stat: metadata structure to decode into - * @dotu: non-zero if 9P2000.u - * - * Note: stat will point to the buf region. - */ - -int -p9_deserialize_stat(void *buf, u32 buflen, struct p9_stat *stat, - int dotu) -{ - struct cbuf buffer; - struct cbuf *bufp = &buffer; - unsigned char *p; - - buf_init(bufp, buf, buflen); - p = bufp->p; - buf_get_stat(bufp, stat, dotu); - - if (buf_check_overflow(bufp)) - return 0; - else - return bufp->p - p; -} -EXPORT_SYMBOL(p9_deserialize_stat); - -/** - * deserialize_fcall - unmarshal a response - * @buf: recieved buffer - * @buflen: length of received buffer - * @rcall: fcall structure to populate - * @rcalllen: length of fcall structure to populate - * @dotu: non-zero if 9P2000.u - * - */ - -int -p9_deserialize_fcall(void *buf, u32 buflen, struct p9_fcall *rcall, - int dotu) -{ - - struct cbuf buffer; - struct cbuf *bufp = &buffer; - int i = 0; - - buf_init(bufp, buf, buflen); - - rcall->size = buf_get_int32(bufp); - rcall->id = buf_get_int8(bufp); - rcall->tag = buf_get_int16(bufp); - - P9_DPRINTK(P9_DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, - rcall->id, rcall->tag); - - switch (rcall->id) { - default: - P9_EPRINTK(KERN_ERR, "unknown message type: %d\n", rcall->id); - return -EPROTO; - case P9_RVERSION: - rcall->params.rversion.msize = buf_get_int32(bufp); - buf_get_str(bufp, &rcall->params.rversion.version); - break; - case P9_RFLUSH: - break; - case P9_RATTACH: - rcall->params.rattach.qid.type = buf_get_int8(bufp); - rcall->params.rattach.qid.version = buf_get_int32(bufp); - rcall->params.rattach.qid.path = buf_get_int64(bufp); - break; - case P9_RWALK: - rcall->params.rwalk.nwqid = buf_get_int16(bufp); - if (rcall->params.rwalk.nwqid > P9_MAXWELEM) { - P9_EPRINTK(KERN_ERR, - "Rwalk with more than %d qids: %d\n", - P9_MAXWELEM, rcall->params.rwalk.nwqid); - return -EPROTO; - } - - for (i = 0; i < rcall->params.rwalk.nwqid; i++) - buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]); - break; - case P9_ROPEN: - buf_get_qid(bufp, &rcall->params.ropen.qid); - rcall->params.ropen.iounit = buf_get_int32(bufp); - break; - case P9_RCREATE: - buf_get_qid(bufp, &rcall->params.rcreate.qid); - rcall->params.rcreate.iounit = buf_get_int32(bufp); - break; - case P9_RREAD: - rcall->params.rread.count = buf_get_int32(bufp); - rcall->params.rread.data = bufp->p; - buf_check_size(bufp, rcall->params.rread.count); - break; - case P9_RWRITE: - rcall->params.rwrite.count = buf_get_int32(bufp); - break; - case P9_RCLUNK: - break; - case P9_RREMOVE: - break; - case P9_RSTAT: - buf_get_int16(bufp); - buf_get_stat(bufp, &rcall->params.rstat.stat, dotu); - break; - case P9_RWSTAT: - break; - case P9_RERROR: - buf_get_str(bufp, &rcall->params.rerror.error); - if (dotu) - rcall->params.rerror.errno = buf_get_int16(bufp); - break; - } - - if (buf_check_overflow(bufp)) { - P9_DPRINTK(P9_DEBUG_ERROR, "buffer overflow\n"); - return -EIO; - } - - return bufp->p - bufp->sp; -} -EXPORT_SYMBOL(p9_deserialize_fcall); - -static inline void p9_put_int8(struct cbuf *bufp, u8 val, u8 * p) -{ - *p = val; - buf_put_int8(bufp, val); -} - -static inline void p9_put_int16(struct cbuf *bufp, u16 val, u16 * p) -{ - *p = val; - buf_put_int16(bufp, val); -} - -static inline void p9_put_int32(struct cbuf *bufp, u32 val, u32 * p) -{ - *p = val; - buf_put_int32(bufp, val); -} - -static inline void p9_put_int64(struct cbuf *bufp, u64 val, u64 * p) -{ - *p = val; - buf_put_int64(bufp, val); -} - -static void -p9_put_str(struct cbuf *bufp, char *data, struct p9_str *str) -{ - int len; - char *s; - - if (data) - len = strlen(data); - else - len = 0; - - s = buf_put_stringn(bufp, data, len); - if (str) { - str->len = len; - str->str = s; - } -} - -static int -p9_put_data(struct cbuf *bufp, const char *data, int count, - unsigned char **pdata) -{ - *pdata = buf_alloc(bufp, count); - memmove(*pdata, data, count); - return count; -} - -static int -p9_put_user_data(struct cbuf *bufp, const char __user *data, int count, - unsigned char **pdata) -{ - *pdata = buf_alloc(bufp, count); - return copy_from_user(*pdata, data, count); -} - -static void -p9_put_wstat(struct cbuf *bufp, struct p9_wstat *wstat, - struct p9_stat *stat, int statsz, int dotu) -{ - p9_put_int16(bufp, statsz, &stat->size); - p9_put_int16(bufp, wstat->type, &stat->type); - p9_put_int32(bufp, wstat->dev, &stat->dev); - p9_put_int8(bufp, wstat->qid.type, &stat->qid.type); - p9_put_int32(bufp, wstat->qid.version, &stat->qid.version); - p9_put_int64(bufp, wstat->qid.path, &stat->qid.path); - p9_put_int32(bufp, wstat->mode, &stat->mode); - p9_put_int32(bufp, wstat->atime, &stat->atime); - p9_put_int32(bufp, wstat->mtime, &stat->mtime); - p9_put_int64(bufp, wstat->length, &stat->length); - - p9_put_str(bufp, wstat->name, &stat->name); - p9_put_str(bufp, wstat->uid, &stat->uid); - p9_put_str(bufp, wstat->gid, &stat->gid); - p9_put_str(bufp, wstat->muid, &stat->muid); - - if (dotu) { - p9_put_str(bufp, wstat->extension, &stat->extension); - p9_put_int32(bufp, wstat->n_uid, &stat->n_uid); - p9_put_int32(bufp, wstat->n_gid, &stat->n_gid); - p9_put_int32(bufp, wstat->n_muid, &stat->n_muid); - } -} - -static struct p9_fcall * -p9_create_common(struct cbuf *bufp, u32 size, u8 id) -{ - struct p9_fcall *fc; - - size += 4 + 1 + 2; /* size[4] id[1] tag[2] */ - fc = kmalloc(sizeof(struct p9_fcall) + size, GFP_KERNEL); - if (!fc) - return ERR_PTR(-ENOMEM); - - fc->sdata = (char *)fc + sizeof(*fc); - - buf_init(bufp, (char *)fc->sdata, size); - p9_put_int32(bufp, size, &fc->size); - p9_put_int8(bufp, id, &fc->id); - p9_put_int16(bufp, P9_NOTAG, &fc->tag); - - return fc; -} - -void p9_set_tag(struct p9_fcall *fc, u16 tag) -{ - fc->tag = tag; - *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag); -} -EXPORT_SYMBOL(p9_set_tag); - -struct p9_fcall *p9_create_tversion(u32 msize, char *version) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 2 + strlen(version); /* msize[4] version[s] */ - fc = p9_create_common(bufp, size, P9_TVERSION); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, msize, &fc->params.tversion.msize); - p9_put_str(bufp, version, &fc->params.tversion.version); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tversion); - -struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname, - u32 n_uname, int dotu) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* afid[4] uname[s] aname[s] */ - size = 4 + 2 + 2; - if (uname) - size += strlen(uname); - - if (aname) - size += strlen(aname); - - if (dotu) - size += 4; /* n_uname */ - - fc = p9_create_common(bufp, size, P9_TAUTH); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, afid, &fc->params.tauth.afid); - p9_put_str(bufp, uname, &fc->params.tauth.uname); - p9_put_str(bufp, aname, &fc->params.tauth.aname); - if (dotu) - p9_put_int32(bufp, n_uname, &fc->params.tauth.n_uname); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tauth); - -struct p9_fcall * -p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname, - u32 n_uname, int dotu) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] afid[4] uname[s] aname[s] */ - size = 4 + 4 + 2 + 2; - if (uname) - size += strlen(uname); - - if (aname) - size += strlen(aname); - - if (dotu) - size += 4; /* n_uname */ - - fc = p9_create_common(bufp, size, P9_TATTACH); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tattach.fid); - p9_put_int32(bufp, afid, &fc->params.tattach.afid); - p9_put_str(bufp, uname, &fc->params.tattach.uname); - p9_put_str(bufp, aname, &fc->params.tattach.aname); - if (dotu) - p9_put_int32(bufp, n_uname, &fc->params.tattach.n_uname); - -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tattach); - -struct p9_fcall *p9_create_tflush(u16 oldtag) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 2; /* oldtag[2] */ - fc = p9_create_common(bufp, size, P9_TFLUSH); - if (IS_ERR(fc)) - goto error; - - p9_put_int16(bufp, oldtag, &fc->params.tflush.oldtag); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tflush); - -struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname, - char **wnames) -{ - int i, size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - if (nwname > P9_MAXWELEM) { - P9_DPRINTK(P9_DEBUG_ERROR, "nwname > %d\n", P9_MAXWELEM); - return NULL; - } - - size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */ - for (i = 0; i < nwname; i++) { - size += 2 + strlen(wnames[i]); /* wname[s] */ - } - - fc = p9_create_common(bufp, size, P9_TWALK); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twalk.fid); - p9_put_int32(bufp, newfid, &fc->params.twalk.newfid); - p9_put_int16(bufp, nwname, &fc->params.twalk.nwname); - for (i = 0; i < nwname; i++) { - p9_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]); - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twalk); - -struct p9_fcall *p9_create_topen(u32 fid, u8 mode) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 1; /* fid[4] mode[1] */ - fc = p9_create_common(bufp, size, P9_TOPEN); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.topen.fid); - p9_put_int8(bufp, mode, &fc->params.topen.mode); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_topen); - -struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, - char *extension, int dotu) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] name[s] perm[4] mode[1] */ - size = 4 + 2 + strlen(name) + 4 + 1; - if (dotu) { - size += 2 + /* extension[s] */ - (extension == NULL ? 0 : strlen(extension)); - } - - fc = p9_create_common(bufp, size, P9_TCREATE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tcreate.fid); - p9_put_str(bufp, name, &fc->params.tcreate.name); - p9_put_int32(bufp, perm, &fc->params.tcreate.perm); - p9_put_int8(bufp, mode, &fc->params.tcreate.mode); - if (dotu) - p9_put_str(bufp, extension, &fc->params.tcreate.extension); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tcreate); - -struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */ - fc = p9_create_common(bufp, size, P9_TREAD); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tread.fid); - p9_put_int64(bufp, offset, &fc->params.tread.offset); - p9_put_int32(bufp, count, &fc->params.tread.count); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tread); - -struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count, - const char *data) -{ - int size, err; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] offset[8] count[4] data[count] */ - size = 4 + 8 + 4 + count; - fc = p9_create_common(bufp, size, P9_TWRITE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twrite.fid); - p9_put_int64(bufp, offset, &fc->params.twrite.offset); - p9_put_int32(bufp, count, &fc->params.twrite.count); - err = p9_put_data(bufp, data, count, &fc->params.twrite.data); - if (err) { - kfree(fc); - fc = ERR_PTR(err); - goto error; - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twrite); - -struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count, - const char __user *data) -{ - int size, err; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] offset[8] count[4] data[count] */ - size = 4 + 8 + 4 + count; - fc = p9_create_common(bufp, size, P9_TWRITE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twrite.fid); - p9_put_int64(bufp, offset, &fc->params.twrite.offset); - p9_put_int32(bufp, count, &fc->params.twrite.count); - err = p9_put_user_data(bufp, data, count, &fc->params.twrite.data); - if (err) { - kfree(fc); - fc = ERR_PTR(err); - goto error; - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twrite_u); - -struct p9_fcall *p9_create_tclunk(u32 fid) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = p9_create_common(bufp, size, P9_TCLUNK); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tclunk.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tclunk); - -struct p9_fcall *p9_create_tremove(u32 fid) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = p9_create_common(bufp, size, P9_TREMOVE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tremove.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tremove); - -struct p9_fcall *p9_create_tstat(u32 fid) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = p9_create_common(bufp, size, P9_TSTAT); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tstat.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tstat); - -struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat, - int dotu) -{ - int size, statsz; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - statsz = p9_size_wstat(wstat, dotu); - size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */ - fc = p9_create_common(bufp, size, P9_TWSTAT); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twstat.fid); - buf_put_int16(bufp, statsz + 2); - p9_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, dotu); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twstat); diff --git a/net/9p/error.c b/net/9p/error.c index ab2458b6c90..126fd0dceea 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -27,12 +27,21 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/list.h> #include <linux/jhash.h> #include <linux/errno.h> #include <net/9p/9p.h> +/** + * struct errormap - map string errors from Plan 9 to Linux numeric ids + * @name: string sent over 9P + * @val: numeric id most closely representing @name + * @namelen: length of string + * @list: hash-table list for string lookup + */ struct errormap { char *name; int val; @@ -177,8 +186,7 @@ static struct errormap errmap[] = { }; /** - * p9_error_init - preload - * @errstr: error string + * p9_error_init - preload mappings into hash list * */ @@ -206,21 +214,20 @@ EXPORT_SYMBOL(p9_error_init); /** * errstr2errno - convert error string to error number * @errstr: error string + * @len: length of error string * */ int p9_errstr2errno(char *errstr, int len) { int errno; - struct hlist_node *p; struct errormap *c; int bucket; errno = 0; - p = NULL; c = NULL; bucket = jhash(errstr, len, 0) % ERRHASHSZ; - hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { + hlist_for_each_entry(c, &hash_errmap[bucket], list) { if (c->namelen == len && !memcmp(c->name, errstr, len)) { errno = c->val; break; @@ -230,9 +237,9 @@ int p9_errstr2errno(char *errstr, int len) if (errno == 0) { /* TODO: if error isn't found, add it dynamically */ errstr[len] = 0; - printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__, - errstr); - errno = 1; + pr_err("%s: server reported unknown error %s\n", + __func__, errstr); + errno = ESERVERFAULT; } return -errno; diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c deleted file mode 100644 index 40244fbd9b0..00000000000 --- a/net/9p/fcprint.c +++ /dev/null @@ -1,358 +0,0 @@ -/* - * net/9p/fcprint.c - * - * Print 9P call. - * - * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/idr.h> -#include <net/9p/9p.h> - -#ifdef CONFIG_NET_9P_DEBUG - -static int -p9_printqid(char *buf, int buflen, struct p9_qid *q) -{ - int n; - char b[10]; - - n = 0; - if (q->type & P9_QTDIR) - b[n++] = 'd'; - if (q->type & P9_QTAPPEND) - b[n++] = 'a'; - if (q->type & P9_QTAUTH) - b[n++] = 'A'; - if (q->type & P9_QTEXCL) - b[n++] = 'l'; - if (q->type & P9_QTTMP) - b[n++] = 't'; - if (q->type & P9_QTSYMLINK) - b[n++] = 'L'; - b[n] = '\0'; - - return scnprintf(buf, buflen, "(%.16llx %x %s)", - (long long int) q->path, q->version, b); -} - -static int -p9_printperm(char *buf, int buflen, int perm) -{ - int n; - char b[15]; - - n = 0; - if (perm & P9_DMDIR) - b[n++] = 'd'; - if (perm & P9_DMAPPEND) - b[n++] = 'a'; - if (perm & P9_DMAUTH) - b[n++] = 'A'; - if (perm & P9_DMEXCL) - b[n++] = 'l'; - if (perm & P9_DMTMP) - b[n++] = 't'; - if (perm & P9_DMDEVICE) - b[n++] = 'D'; - if (perm & P9_DMSOCKET) - b[n++] = 'S'; - if (perm & P9_DMNAMEDPIPE) - b[n++] = 'P'; - if (perm & P9_DMSYMLINK) - b[n++] = 'L'; - b[n] = '\0'; - - return scnprintf(buf, buflen, "%s%03o", b, perm&077); -} - -static int -p9_printstat(char *buf, int buflen, struct p9_stat *st, int extended) -{ - int n; - - n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len, - st->name.str, st->uid.len, st->uid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid); - - n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid); - - n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid); - - n += scnprintf(buf+n, buflen-n, " q "); - n += p9_printqid(buf+n, buflen-n, &st->qid); - n += scnprintf(buf+n, buflen-n, " m "); - n += p9_printperm(buf+n, buflen-n, st->mode); - n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld", - st->atime, st->mtime, (long long int) st->length); - - if (extended) - n += scnprintf(buf+n, buflen-n, " ext '%.*s'", - st->extension.len, st->extension.str); - - return n; -} - -static int -p9_dumpdata(char *buf, int buflen, u8 *data, int datalen) -{ - int i, n; - - i = n = 0; - while (i < datalen) { - n += scnprintf(buf + n, buflen - n, "%02x", data[i]); - if (i%4 == 3) - n += scnprintf(buf + n, buflen - n, " "); - if (i%32 == 31) - n += scnprintf(buf + n, buflen - n, "\n"); - - i++; - } - n += scnprintf(buf + n, buflen - n, "\n"); - - return n; -} - -static int -p9_printdata(char *buf, int buflen, u8 *data, int datalen) -{ - return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16); -} - -int -p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) -{ - int i, ret, type, tag; - - if (!fc) - return scnprintf(buf, buflen, "<NULL>"); - - type = fc->id; - tag = fc->tag; - - ret = 0; - switch (type) { - case P9_TVERSION: - ret += scnprintf(buf+ret, buflen-ret, - "Tversion tag %u msize %u version '%.*s'", tag, - fc->params.tversion.msize, - fc->params.tversion.version.len, - fc->params.tversion.version.str); - break; - - case P9_RVERSION: - ret += scnprintf(buf+ret, buflen-ret, - "Rversion tag %u msize %u version '%.*s'", tag, - fc->params.rversion.msize, - fc->params.rversion.version.len, - fc->params.rversion.version.str); - break; - - case P9_TAUTH: - ret += scnprintf(buf+ret, buflen-ret, - "Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag, - fc->params.tauth.afid, fc->params.tauth.uname.len, - fc->params.tauth.uname.str, fc->params.tauth.aname.len, - fc->params.tauth.aname.str); - break; - - case P9_RAUTH: - ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag); - p9_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid); - break; - - case P9_TATTACH: - ret += scnprintf(buf+ret, buflen-ret, - "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'", tag, - fc->params.tattach.fid, fc->params.tattach.afid, - fc->params.tattach.uname.len, fc->params.tattach.uname.str, - fc->params.tattach.aname.len, fc->params.tattach.aname.str); - break; - - case P9_RATTACH: - ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", - tag); - p9_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid); - break; - - case P9_RERROR: - ret += scnprintf(buf+ret, buflen-ret, - "Rerror tag %u ename '%.*s'", tag, - fc->params.rerror.error.len, - fc->params.rerror.error.str); - if (extended) - ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n", - fc->params.rerror.errno); - break; - - case P9_TFLUSH: - ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u", - tag, fc->params.tflush.oldtag); - break; - - case P9_RFLUSH: - ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag); - break; - - case P9_TWALK: - ret += scnprintf(buf+ret, buflen-ret, - "Twalk tag %u fid %d newfid %d nwname %d", tag, - fc->params.twalk.fid, fc->params.twalk.newfid, - fc->params.twalk.nwname); - for (i = 0; i < fc->params.twalk.nwname; i++) - ret += scnprintf(buf+ret, buflen-ret, " '%.*s'", - fc->params.twalk.wnames[i].len, - fc->params.twalk.wnames[i].str); - break; - - case P9_RWALK: - ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d", - tag, fc->params.rwalk.nwqid); - for (i = 0; i < fc->params.rwalk.nwqid; i++) - ret += p9_printqid(buf+ret, buflen-ret, - &fc->params.rwalk.wqids[i]); - break; - - case P9_TOPEN: - ret += scnprintf(buf+ret, buflen-ret, - "Topen tag %u fid %d mode %d", tag, - fc->params.topen.fid, fc->params.topen.mode); - break; - - case P9_ROPEN: - ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag); - ret += p9_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid); - ret += scnprintf(buf+ret, buflen-ret, " iounit %d", - fc->params.ropen.iounit); - break; - - case P9_TCREATE: - ret += scnprintf(buf+ret, buflen-ret, - "Tcreate tag %u fid %d name '%.*s' perm ", tag, - fc->params.tcreate.fid, fc->params.tcreate.name.len, - fc->params.tcreate.name.str); - - ret += p9_printperm(buf+ret, buflen-ret, - fc->params.tcreate.perm); - ret += scnprintf(buf+ret, buflen-ret, " mode %d", - fc->params.tcreate.mode); - break; - - case P9_RCREATE: - ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag); - ret += p9_printqid(buf+ret, buflen-ret, - &fc->params.rcreate.qid); - ret += scnprintf(buf+ret, buflen-ret, " iounit %d", - fc->params.rcreate.iounit); - break; - - case P9_TREAD: - ret += scnprintf(buf+ret, buflen-ret, - "Tread tag %u fid %d offset %lld count %u", tag, - fc->params.tread.fid, - (long long int) fc->params.tread.offset, - fc->params.tread.count); - break; - - case P9_RREAD: - ret += scnprintf(buf+ret, buflen-ret, - "Rread tag %u count %u data ", tag, - fc->params.rread.count); - ret += p9_printdata(buf+ret, buflen-ret, fc->params.rread.data, - fc->params.rread.count); - break; - - case P9_TWRITE: - ret += scnprintf(buf+ret, buflen-ret, - "Twrite tag %u fid %d offset %lld count %u data ", - tag, fc->params.twrite.fid, - (long long int) fc->params.twrite.offset, - fc->params.twrite.count); - ret += p9_printdata(buf+ret, buflen-ret, fc->params.twrite.data, - fc->params.twrite.count); - break; - - case P9_RWRITE: - ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u", - tag, fc->params.rwrite.count); - break; - - case P9_TCLUNK: - ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d", - tag, fc->params.tclunk.fid); - break; - - case P9_RCLUNK: - ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag); - break; - - case P9_TREMOVE: - ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d", - tag, fc->params.tremove.fid); - break; - - case P9_RREMOVE: - ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag); - break; - - case P9_TSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d", - tag, fc->params.tstat.fid); - break; - - case P9_RSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag); - ret += p9_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat, - extended); - break; - - case P9_TWSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ", - tag, fc->params.twstat.fid); - ret += p9_printstat(buf+ret, buflen-ret, - &fc->params.twstat.stat, extended); - break; - - case P9_RWSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag); - break; - - default: - ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type); - break; - } - - return ret; -} -#else -int -p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) -{ - return 0; -} -#endif /* CONFIG_NET_9P_DEBUG */ -EXPORT_SYMBOL(p9_printfcall); - diff --git a/net/9p/mod.c b/net/9p/mod.c index c285aab2af0..6ab36aea772 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -24,84 +24,149 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> #include <linux/moduleparam.h> #include <net/9p/9p.h> #include <linux/fs.h> #include <linux/parser.h> +#include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/list.h> +#include <linux/spinlock.h> #ifdef CONFIG_NET_9P_DEBUG unsigned int p9_debug_level = 0; /* feature-rific global debug level */ EXPORT_SYMBOL(p9_debug_level); module_param_named(debug, p9_debug_level, uint, 0); MODULE_PARM_DESC(debug, "9P debugging level"); -#endif -extern int p9_mux_global_init(void); -extern void p9_mux_global_exit(void); +void _p9_debug(enum p9_debug_flags level, const char *func, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if ((p9_debug_level & level) != level) + return; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (level == P9_DEBUG_9P) + pr_notice("(%8.8d) %pV", task_pid_nr(current), &vaf); + else + pr_notice("-- %s (%d): %pV", func, task_pid_nr(current), &vaf); + + va_end(args); +} +EXPORT_SYMBOL(_p9_debug); +#endif /* * Dynamic Transport Registration Routines * */ +static DEFINE_SPINLOCK(v9fs_trans_lock); static LIST_HEAD(v9fs_trans_list); -static struct p9_trans_module *v9fs_default_transport; /** * v9fs_register_trans - register a new transport with 9p - * @m - structure describing the transport module and entry points + * @m: structure describing the transport module and entry points * */ void v9fs_register_trans(struct p9_trans_module *m) { + spin_lock(&v9fs_trans_lock); list_add_tail(&m->list, &v9fs_trans_list); - if (m->def) - v9fs_default_transport = m; + spin_unlock(&v9fs_trans_lock); } EXPORT_SYMBOL(v9fs_register_trans); /** - * v9fs_match_trans - match transport versus registered transports - * @arg: string identifying transport + * v9fs_unregister_trans - unregister a 9p transport + * @m: the transport to remove * */ -struct p9_trans_module *v9fs_match_trans(const substring_t *name) +void v9fs_unregister_trans(struct p9_trans_module *m) { - struct list_head *p; - struct p9_trans_module *t = NULL; - - list_for_each(p, &v9fs_trans_list) { - t = list_entry(p, struct p9_trans_module, list); - if (strncmp(t->name, name->from, name->to-name->from) == 0) - return t; - } - return NULL; + spin_lock(&v9fs_trans_lock); + list_del_init(&m->list); + spin_unlock(&v9fs_trans_lock); } -EXPORT_SYMBOL(v9fs_match_trans); +EXPORT_SYMBOL(v9fs_unregister_trans); /** - * v9fs_default_trans - returns pointer to default transport + * v9fs_get_trans_by_name - get transport with the matching name + * @name: string identifying transport * */ +struct p9_trans_module *v9fs_get_trans_by_name(char *s) +{ + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (strcmp(t->name, s) == 0 && + try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; +} +EXPORT_SYMBOL(v9fs_get_trans_by_name); -struct p9_trans_module *v9fs_default_trans(void) +/** + * v9fs_get_default_trans - get the default transport + * + */ + +struct p9_trans_module *v9fs_get_default_trans(void) { - if (v9fs_default_transport) - return v9fs_default_transport; - else if (!list_empty(&v9fs_trans_list)) - return list_first_entry(&v9fs_trans_list, - struct p9_trans_module, list); - else - return NULL; + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (t->def && try_module_get(t->owner)) { + found = t; + break; + } + + if (!found) + list_for_each_entry(t, &v9fs_trans_list, list) + if (try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; } -EXPORT_SYMBOL(v9fs_default_trans); +EXPORT_SYMBOL(v9fs_get_default_trans); +/** + * v9fs_put_trans - put trans + * @m: transport to put + * + */ +void v9fs_put_trans(struct p9_trans_module *m) +{ + if (m) + module_put(m->owner); +} /** - * v9fs_init - Initialize module + * init_p9 - Initialize module * */ static int __init init_p9(void) @@ -109,19 +174,22 @@ static int __init init_p9(void) int ret = 0; p9_error_init(); - printk(KERN_INFO "Installing 9P2000 support\n"); + pr_info("Installing 9P2000 support\n"); + p9_trans_fd_init(); return ret; } /** - * v9fs_init - shutdown module + * exit_p9 - shutdown module * */ static void __exit exit_p9(void) { - printk(KERN_INFO "Unloading 9P2000 support\n"); + pr_info("Unloading 9P2000 support\n"); + + p9_trans_fd_exit(); } module_init(init_p9) diff --git a/net/9p/protocol.c b/net/9p/protocol.c new file mode 100644 index 00000000000..ab9127ec5b7 --- /dev/null +++ b/net/9p/protocol.c @@ -0,0 +1,636 @@ +/* + * net/9p/protocol.c + * + * 9P Protocol Support Code + * + * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> + * + * Base on code from Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2008 by IBM, Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/uaccess.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/stddef.h> +#include <linux/types.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include "protocol.h" + +#include <trace/events/9p.h> + +static int +p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); + +void p9stat_free(struct p9_wstat *stbuf) +{ + kfree(stbuf->name); + kfree(stbuf->uid); + kfree(stbuf->gid); + kfree(stbuf->muid); + kfree(stbuf->extension); +} +EXPORT_SYMBOL(p9stat_free); + +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) +{ + size_t len = min(pdu->size - pdu->offset, size); + memcpy(data, &pdu->sdata[pdu->offset], len); + pdu->offset += len; + return size - len; +} + +static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) +{ + size_t len = min(pdu->capacity - pdu->size, size); + memcpy(&pdu->sdata[pdu->size], data, len); + pdu->size += len; + return size - len; +} + +static size_t +pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) +{ + size_t len = min(pdu->capacity - pdu->size, size); + if (copy_from_user(&pdu->sdata[pdu->size], udata, len)) + len = 0; + + pdu->size += len; + return size - len; +} + +/* + b - int8_t + w - int16_t + d - int32_t + q - int64_t + s - string + u - numeric uid + g - numeric gid + S - stat + Q - qid + D - data blob (int32_t size followed by void *, results are not freed) + T - array of strings (int16_t count, followed by strings) + R - array of qids (int16_t count, followed by qids) + A - stat for 9p2000.L (p9_stat_dotl) + ? - if optional = 1, continue parsing +*/ + +static int +p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, + va_list ap) +{ + const char *ptr; + int errcode = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b':{ + int8_t *val = va_arg(ap, int8_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + } + break; + case 'w':{ + int16_t *val = va_arg(ap, int16_t *); + __le16 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { + errcode = -EFAULT; + break; + } + *val = le16_to_cpu(le_val); + } + break; + case 'd':{ + int32_t *val = va_arg(ap, int32_t *); + __le32 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { + errcode = -EFAULT; + break; + } + *val = le32_to_cpu(le_val); + } + break; + case 'q':{ + int64_t *val = va_arg(ap, int64_t *); + __le64 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { + errcode = -EFAULT; + break; + } + *val = le64_to_cpu(le_val); + } + break; + case 's':{ + char **sptr = va_arg(ap, char **); + uint16_t len; + + errcode = p9pdu_readf(pdu, proto_version, + "w", &len); + if (errcode) + break; + + *sptr = kmalloc(len + 1, GFP_NOFS); + if (*sptr == NULL) { + errcode = -EFAULT; + break; + } + if (pdu_read(pdu, *sptr, len)) { + errcode = -EFAULT; + kfree(*sptr); + *sptr = NULL; + } else + (*sptr)[len] = 0; + } + break; + case 'u': { + kuid_t *uid = va_arg(ap, kuid_t *); + __le32 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { + errcode = -EFAULT; + break; + } + *uid = make_kuid(&init_user_ns, + le32_to_cpu(le_val)); + } break; + case 'g': { + kgid_t *gid = va_arg(ap, kgid_t *); + __le32 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { + errcode = -EFAULT; + break; + } + *gid = make_kgid(&init_user_ns, + le32_to_cpu(le_val)); + } break; + case 'Q':{ + struct p9_qid *qid = + va_arg(ap, struct p9_qid *); + + errcode = p9pdu_readf(pdu, proto_version, "bdq", + &qid->type, &qid->version, + &qid->path); + } + break; + case 'S':{ + struct p9_wstat *stbuf = + va_arg(ap, struct p9_wstat *); + + memset(stbuf, 0, sizeof(struct p9_wstat)); + stbuf->n_uid = stbuf->n_muid = INVALID_UID; + stbuf->n_gid = INVALID_GID; + + errcode = + p9pdu_readf(pdu, proto_version, + "wwdQdddqssss?sugu", + &stbuf->size, &stbuf->type, + &stbuf->dev, &stbuf->qid, + &stbuf->mode, &stbuf->atime, + &stbuf->mtime, &stbuf->length, + &stbuf->name, &stbuf->uid, + &stbuf->gid, &stbuf->muid, + &stbuf->extension, + &stbuf->n_uid, &stbuf->n_gid, + &stbuf->n_muid); + if (errcode) + p9stat_free(stbuf); + } + break; + case 'D':{ + uint32_t *count = va_arg(ap, uint32_t *); + void **data = va_arg(ap, void **); + + errcode = + p9pdu_readf(pdu, proto_version, "d", count); + if (!errcode) { + *count = + min_t(uint32_t, *count, + pdu->size - pdu->offset); + *data = &pdu->sdata[pdu->offset]; + } + } + break; + case 'T':{ + uint16_t *nwname = va_arg(ap, uint16_t *); + char ***wnames = va_arg(ap, char ***); + + errcode = p9pdu_readf(pdu, proto_version, + "w", nwname); + if (!errcode) { + *wnames = + kmalloc(sizeof(char *) * *nwname, + GFP_NOFS); + if (!*wnames) + errcode = -ENOMEM; + } + + if (!errcode) { + int i; + + for (i = 0; i < *nwname; i++) { + errcode = + p9pdu_readf(pdu, + proto_version, + "s", + &(*wnames)[i]); + if (errcode) + break; + } + } + + if (errcode) { + if (*wnames) { + int i; + + for (i = 0; i < *nwname; i++) + kfree((*wnames)[i]); + } + kfree(*wnames); + *wnames = NULL; + } + } + break; + case 'R':{ + int16_t *nwqid = va_arg(ap, int16_t *); + struct p9_qid **wqids = + va_arg(ap, struct p9_qid **); + + *wqids = NULL; + + errcode = + p9pdu_readf(pdu, proto_version, "w", nwqid); + if (!errcode) { + *wqids = + kmalloc(*nwqid * + sizeof(struct p9_qid), + GFP_NOFS); + if (*wqids == NULL) + errcode = -ENOMEM; + } + + if (!errcode) { + int i; + + for (i = 0; i < *nwqid; i++) { + errcode = + p9pdu_readf(pdu, + proto_version, + "Q", + &(*wqids)[i]); + if (errcode) + break; + } + } + + if (errcode) { + kfree(*wqids); + *wqids = NULL; + } + } + break; + case 'A': { + struct p9_stat_dotl *stbuf = + va_arg(ap, struct p9_stat_dotl *); + + memset(stbuf, 0, sizeof(struct p9_stat_dotl)); + errcode = + p9pdu_readf(pdu, proto_version, + "qQdugqqqqqqqqqqqqqqq", + &stbuf->st_result_mask, + &stbuf->qid, + &stbuf->st_mode, + &stbuf->st_uid, &stbuf->st_gid, + &stbuf->st_nlink, + &stbuf->st_rdev, &stbuf->st_size, + &stbuf->st_blksize, &stbuf->st_blocks, + &stbuf->st_atime_sec, + &stbuf->st_atime_nsec, + &stbuf->st_mtime_sec, + &stbuf->st_mtime_nsec, + &stbuf->st_ctime_sec, + &stbuf->st_ctime_nsec, + &stbuf->st_btime_sec, + &stbuf->st_btime_nsec, + &stbuf->st_gen, + &stbuf->st_data_version); + } + break; + case '?': + if ((proto_version != p9_proto_2000u) && + (proto_version != p9_proto_2000L)) + return 0; + break; + default: + BUG(); + break; + } + + if (errcode) + break; + } + + return errcode; +} + +int +p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, + va_list ap) +{ + const char *ptr; + int errcode = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b':{ + int8_t val = va_arg(ap, int); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'w':{ + __le16 val = cpu_to_le16(va_arg(ap, int)); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'd':{ + __le32 val = cpu_to_le32(va_arg(ap, int32_t)); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'q':{ + __le64 val = cpu_to_le64(va_arg(ap, int64_t)); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 's':{ + const char *sptr = va_arg(ap, const char *); + uint16_t len = 0; + if (sptr) + len = min_t(size_t, strlen(sptr), + USHRT_MAX); + + errcode = p9pdu_writef(pdu, proto_version, + "w", len); + if (!errcode && pdu_write(pdu, sptr, len)) + errcode = -EFAULT; + } + break; + case 'u': { + kuid_t uid = va_arg(ap, kuid_t); + __le32 val = cpu_to_le32( + from_kuid(&init_user_ns, uid)); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } break; + case 'g': { + kgid_t gid = va_arg(ap, kgid_t); + __le32 val = cpu_to_le32( + from_kgid(&init_user_ns, gid)); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } break; + case 'Q':{ + const struct p9_qid *qid = + va_arg(ap, const struct p9_qid *); + errcode = + p9pdu_writef(pdu, proto_version, "bdq", + qid->type, qid->version, + qid->path); + } break; + case 'S':{ + const struct p9_wstat *stbuf = + va_arg(ap, const struct p9_wstat *); + errcode = + p9pdu_writef(pdu, proto_version, + "wwdQdddqssss?sugu", + stbuf->size, stbuf->type, + stbuf->dev, &stbuf->qid, + stbuf->mode, stbuf->atime, + stbuf->mtime, stbuf->length, + stbuf->name, stbuf->uid, + stbuf->gid, stbuf->muid, + stbuf->extension, stbuf->n_uid, + stbuf->n_gid, stbuf->n_muid); + } break; + case 'D':{ + uint32_t count = va_arg(ap, uint32_t); + const void *data = va_arg(ap, const void *); + + errcode = p9pdu_writef(pdu, proto_version, "d", + count); + if (!errcode && pdu_write(pdu, data, count)) + errcode = -EFAULT; + } + break; + case 'U':{ + int32_t count = va_arg(ap, int32_t); + const char __user *udata = + va_arg(ap, const void __user *); + errcode = p9pdu_writef(pdu, proto_version, "d", + count); + if (!errcode && pdu_write_u(pdu, udata, count)) + errcode = -EFAULT; + } + break; + case 'T':{ + uint16_t nwname = va_arg(ap, int); + const char **wnames = va_arg(ap, const char **); + + errcode = p9pdu_writef(pdu, proto_version, "w", + nwname); + if (!errcode) { + int i; + + for (i = 0; i < nwname; i++) { + errcode = + p9pdu_writef(pdu, + proto_version, + "s", + wnames[i]); + if (errcode) + break; + } + } + } + break; + case 'R':{ + int16_t nwqid = va_arg(ap, int); + struct p9_qid *wqids = + va_arg(ap, struct p9_qid *); + + errcode = p9pdu_writef(pdu, proto_version, "w", + nwqid); + if (!errcode) { + int i; + + for (i = 0; i < nwqid; i++) { + errcode = + p9pdu_writef(pdu, + proto_version, + "Q", + &wqids[i]); + if (errcode) + break; + } + } + } + break; + case 'I':{ + struct p9_iattr_dotl *p9attr = va_arg(ap, + struct p9_iattr_dotl *); + + errcode = p9pdu_writef(pdu, proto_version, + "ddugqqqqq", + p9attr->valid, + p9attr->mode, + p9attr->uid, + p9attr->gid, + p9attr->size, + p9attr->atime_sec, + p9attr->atime_nsec, + p9attr->mtime_sec, + p9attr->mtime_nsec); + } + break; + case '?': + if ((proto_version != p9_proto_2000u) && + (proto_version != p9_proto_2000L)) + return 0; + break; + default: + BUG(); + break; + } + + if (errcode) + break; + } + + return errcode; +} + +int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9pdu_vreadf(pdu, proto_version, fmt, ap); + va_end(ap); + + return ret; +} + +static int +p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9pdu_vwritef(pdu, proto_version, fmt, ap); + va_end(ap); + + return ret; +} + +int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) +{ + struct p9_fcall fake_pdu; + int ret; + + fake_pdu.size = len; + fake_pdu.capacity = len; + fake_pdu.sdata = buf; + fake_pdu.offset = 0; + + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st); + if (ret) { + p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); + trace_9p_protocol_dump(clnt, &fake_pdu); + } + + return ret; +} +EXPORT_SYMBOL(p9stat_read); + +int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) +{ + pdu->id = type; + return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); +} + +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu) +{ + int size = pdu->size; + int err; + + pdu->size = 0; + err = p9pdu_writef(pdu, 0, "d", size); + pdu->size = size; + + trace_9p_protocol_dump(clnt, pdu); + p9_debug(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", + pdu->size, pdu->id, pdu->tag); + + return err; +} + +void p9pdu_reset(struct p9_fcall *pdu) +{ + pdu->offset = 0; + pdu->size = 0; +} + +int p9dirent_read(struct p9_client *clnt, char *buf, int len, + struct p9_dirent *dirent) +{ + struct p9_fcall fake_pdu; + int ret; + char *nameptr; + + fake_pdu.size = len; + fake_pdu.capacity = len; + fake_pdu.sdata = buf; + fake_pdu.offset = 0; + + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid, + &dirent->d_off, &dirent->d_type, &nameptr); + if (ret) { + p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); + trace_9p_protocol_dump(clnt, &fake_pdu); + goto out; + } + + strcpy(dirent->d_name, nameptr); + kfree(nameptr); + +out: + return fake_pdu.offset; +} +EXPORT_SYMBOL(p9dirent_read); diff --git a/net/9p/protocol.h b/net/9p/protocol.h new file mode 100644 index 00000000000..2cc525fa49f --- /dev/null +++ b/net/9p/protocol.h @@ -0,0 +1,34 @@ +/* + * net/9p/protocol.h + * + * 9P Protocol Support Code + * + * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> + * + * Base on code from Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2008 by IBM, Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, + va_list ap); +int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); +int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu); +void p9pdu_reset(struct p9_fcall *pdu); +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size); diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c new file mode 100644 index 00000000000..2ee3879161b --- /dev/null +++ b/net/9p/trans_common.c @@ -0,0 +1,69 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <linux/scatterlist.h> +#include "trans_common.h" + +/** + * p9_release_req_pages - Release pages after the transaction. + */ +void p9_release_pages(struct page **pages, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) + if (pages[i]) + put_page(pages[i]); +} +EXPORT_SYMBOL(p9_release_pages); + +/** + * p9_nr_pages - Return number of pages needed to accommodate the payload. + */ +int p9_nr_pages(char *data, int len) +{ + unsigned long start_page, end_page; + start_page = (unsigned long)data >> PAGE_SHIFT; + end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + return end_page - start_page; +} +EXPORT_SYMBOL(p9_nr_pages); + +/** + * payload_gup - Translates user buffer into kernel pages and + * pins them either for read/write through get_user_pages_fast(). + * @req: Request to be sent to server. + * @pdata_off: data offset into the first page after translation (gup). + * @pdata_len: Total length of the IO. gup may not return requested # of pages. + * @nr_pages: number of pages to accommodate the payload + * @rw: Indicates if the pages are for read or write. + */ + +int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write) +{ + int nr_mapped_pages; + + nr_mapped_pages = get_user_pages_fast((unsigned long)data, + *nr_pages, write, pages); + if (nr_mapped_pages <= 0) + return nr_mapped_pages; + + *nr_pages = nr_mapped_pages; + return 0; +} +EXPORT_SYMBOL(p9_payload_gup); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h new file mode 100644 index 00000000000..173bb550a9e --- /dev/null +++ b/net/9p/trans_common.h @@ -0,0 +1,17 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +void p9_release_pages(struct page **, int); +int p9_payload_gup(char *, int *, struct page **, int); +int p9_nr_pages(char *, int); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index f624dff7685..80d08f6664c 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -25,6 +25,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> @@ -38,25 +40,30 @@ #include <linux/idr.h> #include <linux/file.h> #include <linux/parser.h> +#include <linux/slab.h> #include <net/9p/9p.h> +#include <net/9p/client.h> #include <net/9p/transport.h> +#include <linux/syscalls.h> /* killme */ + #define P9_PORT 564 #define MAX_SOCK_BUF (64*1024) -#define ERREQFLUSH 1 -#define SCHED_TIMEOUT 10 #define MAXPOLLWADDR 2 +/** + * struct p9_fd_opts - per-transport options + * @rfd: file descriptor for reading (trans=fd) + * @wfd: file descriptor for writing (trans=fd) + * @port: port to connect to (trans=tcp) + * + */ + struct p9_fd_opts { int rfd; int wfd; u16 port; -}; - -struct p9_trans_fd { - struct file *rd; - struct file *wr; - struct p9_conn *conn; + int privport; }; /* @@ -67,12 +74,15 @@ struct p9_trans_fd { enum { /* Options that take integer arguments */ Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, + /* Options that take no arguments */ + Opt_privport, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, + {Opt_privport, "privport"}, {Opt_err, NULL}, }; @@ -83,456 +93,360 @@ enum { Wpending = 8, /* can write */ }; -enum { - None, - Flushing, - Flushed, -}; - -struct p9_req; - -typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a); -struct p9_req { - spinlock_t lock; /* protect request structure */ - int tag; - struct p9_fcall *tcall; - struct p9_fcall *rcall; - int err; - p9_conn_req_callback cb; - void *cba; - int flush; - struct list_head req_list; +struct p9_poll_wait { + struct p9_conn *conn; + wait_queue_t wait; + wait_queue_head_t *wait_addr; }; -struct p9_mux_poll_task; +/** + * struct p9_conn - fd mux connection state information + * @mux_list: list link for mux to manage multiple connections (?) + * @client: reference to client instance for this connection + * @err: error state + * @req_list: accounting for requests which have been sent + * @unsent_req_list: accounting for requests that haven't been sent + * @req: current request being processed (if any) + * @tmp_buf: temporary buffer to read in header + * @rsize: amount to read for current frame + * @rpos: read position in current frame + * @rbuf: current read buffer + * @wpos: write position for current frame + * @wsize: amount of data to write for current frame + * @wbuf: current write buffer + * @poll_pending_link: pending links to be polled per conn + * @poll_wait: array of wait_q's for various worker threads + * @pt: poll state + * @rq: current read work + * @wq: current write work + * @wsched: ???? + * + */ struct p9_conn { - spinlock_t lock; /* protect lock structure */ struct list_head mux_list; - struct p9_mux_poll_task *poll_task; - int msize; - unsigned char extended; - struct p9_trans *trans; - struct p9_idpool *tagpool; + struct p9_client *client; int err; - wait_queue_head_t equeue; struct list_head req_list; struct list_head unsent_req_list; - struct p9_fcall *rcall; + struct p9_req_t *req; + char tmp_buf[7]; + int rsize; int rpos; char *rbuf; int wpos; int wsize; char *wbuf; - wait_queue_t poll_wait[MAXPOLLWADDR]; - wait_queue_head_t *poll_waddr[MAXPOLLWADDR]; + struct list_head poll_pending_link; + struct p9_poll_wait poll_wait[MAXPOLLWADDR]; poll_table pt; struct work_struct rq; struct work_struct wq; unsigned long wsched; }; -struct p9_mux_poll_task { - struct task_struct *task; - struct list_head mux_list; - int muxnum; -}; +/** + * struct p9_trans_fd - transport state + * @rd: reference to file to read from + * @wr: reference of file to write to + * @conn: connection state reference + * + */ -struct p9_mux_rpc { - struct p9_conn *m; - int err; - struct p9_fcall *tcall; - struct p9_fcall *rcall; - wait_queue_head_t wqueue; +struct p9_trans_fd { + struct file *rd; + struct file *wr; + struct p9_conn conn; }; -static int p9_poll_proc(void *); -static void p9_read_work(struct work_struct *work); -static void p9_write_work(struct work_struct *work); -static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, - poll_table *p); -static int p9_fd_write(struct p9_trans *trans, void *v, int len); -static int p9_fd_read(struct p9_trans *trans, void *v, int len); - -static DEFINE_MUTEX(p9_mux_task_lock); -static struct workqueue_struct *p9_mux_wq; - -static int p9_mux_num; -static int p9_mux_poll_task_num; -static struct p9_mux_poll_task p9_mux_poll_tasks[100]; +static void p9_poll_workfn(struct work_struct *work); -static void p9_conn_destroy(struct p9_conn *); -static unsigned int p9_fd_poll(struct p9_trans *trans, - struct poll_table_struct *pt); +static DEFINE_SPINLOCK(p9_poll_lock); +static LIST_HEAD(p9_poll_pending_list); +static DECLARE_WORK(p9_poll_work, p9_poll_workfn); -#ifdef P9_NONBLOCK -static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, - p9_conn_req_callback cb, void *a); -#endif /* P9_NONBLOCK */ +static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; +static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; -static void p9_conn_cancel(struct p9_conn *m, int err); - -static int p9_mux_global_init(void) +static void p9_mux_poll_stop(struct p9_conn *m) { + unsigned long flags; int i; - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) - p9_mux_poll_tasks[i].task = NULL; + for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { + struct p9_poll_wait *pwait = &m->poll_wait[i]; - p9_mux_wq = create_workqueue("v9fs"); - if (!p9_mux_wq) { - printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); - return -ENOMEM; + if (pwait->wait_addr) { + remove_wait_queue(pwait->wait_addr, &pwait->wait); + pwait->wait_addr = NULL; + } } - return 0; -} - -static u16 p9_mux_get_tag(struct p9_conn *m) -{ - int tag; - - tag = p9_idpool_get(m->tagpool); - if (tag < 0) - return P9_NOTAG; - else - return (u16) tag; -} - -static void p9_mux_put_tag(struct p9_conn *m, u16 tag) -{ - if (tag != P9_NOTAG && p9_idpool_check(tag, m->tagpool)) - p9_idpool_put(tag, m->tagpool); + spin_lock_irqsave(&p9_poll_lock, flags); + list_del_init(&m->poll_pending_link); + spin_unlock_irqrestore(&p9_poll_lock, flags); } /** - * p9_mux_calc_poll_procs - calculates the number of polling procs - * based on the number of mounted v9fs filesystems. + * p9_conn_cancel - cancel all pending requests with error + * @m: mux data + * @err: error code * - * The current implementation returns sqrt of the number of mounts. */ -static int p9_mux_calc_poll_procs(int muxnum) -{ - int n; - - if (p9_mux_poll_task_num) - n = muxnum / p9_mux_poll_task_num + - (muxnum % p9_mux_poll_task_num ? 1 : 0); - else - n = 1; - - if (n > ARRAY_SIZE(p9_mux_poll_tasks)) - n = ARRAY_SIZE(p9_mux_poll_tasks); - - return n; -} -static int p9_mux_poll_start(struct p9_conn *m) +static void p9_conn_cancel(struct p9_conn *m, int err) { - int i, n; - struct p9_mux_poll_task *vpt, *vptlast; - struct task_struct *pproc; - - P9_DPRINTK(P9_DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, p9_mux_num, - p9_mux_poll_task_num); - mutex_lock(&p9_mux_task_lock); - - n = p9_mux_calc_poll_procs(p9_mux_num + 1); - if (n > p9_mux_poll_task_num) { - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { - if (p9_mux_poll_tasks[i].task == NULL) { - vpt = &p9_mux_poll_tasks[i]; - P9_DPRINTK(P9_DEBUG_MUX, "create proc %p\n", - vpt); - pproc = kthread_create(p9_poll_proc, vpt, - "v9fs-poll"); - - if (!IS_ERR(pproc)) { - vpt->task = pproc; - INIT_LIST_HEAD(&vpt->mux_list); - vpt->muxnum = 0; - p9_mux_poll_task_num++; - wake_up_process(vpt->task); - } - break; - } - } + struct p9_req_t *req, *rtmp; + unsigned long flags; + LIST_HEAD(cancel_list); - if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) - P9_DPRINTK(P9_DEBUG_ERROR, - "warning: no free poll slots\n"); - } + p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); - n = (p9_mux_num + 1) / p9_mux_poll_task_num + - ((p9_mux_num + 1) % p9_mux_poll_task_num ? 1 : 0); - - vptlast = NULL; - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { - vpt = &p9_mux_poll_tasks[i]; - if (vpt->task != NULL) { - vptlast = vpt; - if (vpt->muxnum < n) { - P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); - list_add(&m->mux_list, &vpt->mux_list); - vpt->muxnum++; - m->poll_task = vpt; - memset(&m->poll_waddr, 0, - sizeof(m->poll_waddr)); - init_poll_funcptr(&m->pt, p9_pollwait); - break; - } - } - } + spin_lock_irqsave(&m->client->lock, flags); - if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) { - if (vptlast == NULL) { - mutex_unlock(&p9_mux_task_lock); - return -ENOMEM; - } - - P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); - list_add(&m->mux_list, &vptlast->mux_list); - vptlast->muxnum++; - m->poll_task = vptlast; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - init_poll_funcptr(&m->pt, p9_pollwait); + if (m->err) { + spin_unlock_irqrestore(&m->client->lock, flags); + return; } - p9_mux_num++; - mutex_unlock(&p9_mux_task_lock); - - return 0; -} + m->err = err; -static void p9_mux_poll_stop(struct p9_conn *m) -{ - int i; - struct p9_mux_poll_task *vpt; - - mutex_lock(&p9_mux_task_lock); - vpt = m->poll_task; - list_del(&m->mux_list); - for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { - if (m->poll_waddr[i] != NULL) { - remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]); - m->poll_waddr[i] = NULL; - } + list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { + list_move(&req->req_list, &cancel_list); + } + list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { + list_move(&req->req_list, &cancel_list); } - vpt->muxnum--; - if (!vpt->muxnum) { - P9_DPRINTK(P9_DEBUG_MUX, "destroy proc %p\n", vpt); - kthread_stop(vpt->task); - vpt->task = NULL; - p9_mux_poll_task_num--; + spin_unlock_irqrestore(&m->client->lock, flags); + + list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { + p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); + list_del(&req->req_list); + if (!req->t_err) + req->t_err = err; + p9_client_cb(m->client, req, REQ_STATUS_ERROR); } - p9_mux_num--; - mutex_unlock(&p9_mux_task_lock); } -/** - * p9_conn_create - allocate and initialize the per-session mux data - * Creates the polling task if this is the first session. - * - * @trans - transport structure - * @msize - maximum message size - * @extended - extended flag - */ -static struct p9_conn *p9_conn_create(struct p9_trans *trans) +static int +p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) { - int i, n; - struct p9_conn *m, *mtmp; + int ret, n; + struct p9_trans_fd *ts = NULL; - P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans, - trans->msize); - m = kmalloc(sizeof(struct p9_conn), GFP_KERNEL); - if (!m) - return ERR_PTR(-ENOMEM); + if (client && client->status == Connected) + ts = client->trans; - spin_lock_init(&m->lock); - INIT_LIST_HEAD(&m->mux_list); - m->msize = trans->msize; - m->extended = trans->extended; - m->trans = trans; - m->tagpool = p9_idpool_create(); - if (IS_ERR(m->tagpool)) { - mtmp = ERR_PTR(-ENOMEM); - kfree(m); - return mtmp; - } + if (!ts) + return -EREMOTEIO; - m->err = 0; - init_waitqueue_head(&m->equeue); - INIT_LIST_HEAD(&m->req_list); - INIT_LIST_HEAD(&m->unsent_req_list); - m->rcall = NULL; - m->rpos = 0; - m->rbuf = NULL; - m->wpos = m->wsize = 0; - m->wbuf = NULL; - INIT_WORK(&m->rq, p9_read_work); - INIT_WORK(&m->wq, p9_write_work); - m->wsched = 0; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - m->poll_task = NULL; - n = p9_mux_poll_start(m); - if (n) { - kfree(m); - return ERR_PTR(n); - } + if (!ts->rd->f_op->poll) + return -EIO; - n = p9_fd_poll(trans, &m->pt); - if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); - set_bit(Rpending, &m->wsched); - } + if (!ts->wr->f_op->poll) + return -EIO; - if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); - set_bit(Wpending, &m->wsched); - } + ret = ts->rd->f_op->poll(ts->rd, pt); + if (ret < 0) + return ret; - for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { - if (IS_ERR(m->poll_waddr[i])) { - p9_mux_poll_stop(m); - mtmp = (void *)m->poll_waddr; /* the error code */ - kfree(m); - m = mtmp; - break; - } + if (ts->rd != ts->wr) { + n = ts->wr->f_op->poll(ts->wr, pt); + if (n < 0) + return n; + ret = (ret & ~POLLOUT) | (n & ~POLLIN); } - return m; + return ret; } /** - * p9_mux_destroy - cancels all pending requests and frees mux resources + * p9_fd_read- read from a fd + * @client: client instance + * @v: buffer to receive data into + * @len: size of receive buffer + * */ -static void p9_conn_destroy(struct p9_conn *m) + +static int p9_fd_read(struct p9_client *client, void *v, int len) { - P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, - m->mux_list.prev, m->mux_list.next); - p9_conn_cancel(m, -ECONNRESET); + int ret; + struct p9_trans_fd *ts = NULL; - if (!list_empty(&m->req_list)) { - /* wait until all processes waiting on this session exit */ - P9_DPRINTK(P9_DEBUG_MUX, - "mux %p waiting for empty request queue\n", m); - wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p request queue empty: %d\n", m, - list_empty(&m->req_list)); - } + if (client && client->status != Disconnected) + ts = client->trans; - p9_mux_poll_stop(m); - m->trans = NULL; - p9_idpool_destroy(m->tagpool); - kfree(m); + if (!ts) + return -EREMOTEIO; + + if (!(ts->rd->f_flags & O_NONBLOCK)) + p9_debug(P9_DEBUG_ERROR, "blocking read ...\n"); + + ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); + if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) + client->status = Disconnected; + return ret; } /** - * p9_pollwait - called by files poll operation to add v9fs-poll task - * to files wait queue + * p9_read_work - called when there is some data to be read from a transport + * @work: container of work to be done + * */ -static void -p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) + +static void p9_read_work(struct work_struct *work) { - int i; + int n, err; struct p9_conn *m; + int status = REQ_STATUS_ERROR; - m = container_of(p, struct p9_conn, pt); - for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) - if (m->poll_waddr[i] == NULL) - break; + m = container_of(work, struct p9_conn, rq); - if (i >= ARRAY_SIZE(m->poll_waddr)) { - P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); + if (m->err < 0) return; - } - m->poll_waddr[i] = wait_address; + p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); - if (!wait_address) { - P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n"); - m->poll_waddr[i] = ERR_PTR(-EIO); - return; + if (!m->rbuf) { + m->rbuf = m->tmp_buf; + m->rpos = 0; + m->rsize = 7; /* start by reading header */ } - init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task); - add_wait_queue(wait_address, &m->poll_wait[i]); -} + clear_bit(Rpending, &m->wsched); + p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", + m, m->rpos, m->rsize, m->rsize-m->rpos); + err = p9_fd_read(m->client, m->rbuf + m->rpos, + m->rsize - m->rpos); + p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); + if (err == -EAGAIN) { + goto end_clear; + } -/** - * p9_poll_mux - polls a mux and schedules read or write works if necessary - */ -static void p9_poll_mux(struct p9_conn *m) -{ - int n; + if (err <= 0) + goto error; - if (m->err < 0) - return; + m->rpos += err; - n = p9_fd_poll(m->trans, NULL); - if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { - P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n); - if (n >= 0) - n = -ECONNRESET; - p9_conn_cancel(m, n); - } + if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */ + u16 tag; + p9_debug(P9_DEBUG_TRANS, "got new header\n"); - if (n & POLLIN) { - set_bit(Rpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); - if (!test_and_set_bit(Rworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); + n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */ + if (n >= m->client->msize) { + p9_debug(P9_DEBUG_ERROR, + "requested packet size too big: %d\n", n); + err = -EIO; + goto error; } + + tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */ + p9_debug(P9_DEBUG_TRANS, + "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); + + m->req = p9_tag_lookup(m->client, tag); + if (!m->req || (m->req->status != REQ_STATUS_SENT)) { + p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", + tag); + err = -EIO; + goto error; + } + + if (m->req->rc == NULL) { + m->req->rc = kmalloc(sizeof(struct p9_fcall) + + m->client->msize, GFP_NOFS); + if (!m->req->rc) { + m->req = NULL; + err = -ENOMEM; + goto error; + } + } + m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall); + memcpy(m->rbuf, m->tmp_buf, m->rsize); + m->rsize = n; } - if (n & POLLOUT) { - set_bit(Wpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); - if ((m->wsize || !list_empty(&m->unsent_req_list)) - && !test_and_set_bit(Wworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); + /* not an else because some packets (like clunk) have no payload */ + if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ + p9_debug(P9_DEBUG_TRANS, "got new packet\n"); + spin_lock(&m->client->lock); + if (m->req->status != REQ_STATUS_ERROR) + status = REQ_STATUS_RCVD; + list_del(&m->req->req_list); + spin_unlock(&m->client->lock); + p9_client_cb(m->client, m->req, status); + m->rbuf = NULL; + m->rpos = 0; + m->rsize = 0; + m->req = NULL; + } + +end_clear: + clear_bit(Rworksched, &m->wsched); + + if (!list_empty(&m->req_list)) { + if (test_and_clear_bit(Rpending, &m->wsched)) + n = POLLIN; + else + n = p9_fd_poll(m->client, NULL); + + if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { + p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); + schedule_work(&m->rq); } } + + return; +error: + p9_conn_cancel(m, err); + clear_bit(Rworksched, &m->wsched); } /** - * p9_poll_proc - polls all v9fs transports for new events and queues - * the appropriate work to the work queue + * p9_fd_write - write to a socket + * @client: client instance + * @v: buffer to send data from + * @len: size of send buffer + * */ -static int p9_poll_proc(void *a) + +static int p9_fd_write(struct p9_client *client, void *v, int len) { - struct p9_conn *m, *mtmp; - struct p9_mux_poll_task *vpt; + int ret; + mm_segment_t oldfs; + struct p9_trans_fd *ts = NULL; - vpt = a; - P9_DPRINTK(P9_DEBUG_MUX, "start %p %p\n", current, vpt); - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); + if (client && client->status != Disconnected) + ts = client->trans; - list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) { - p9_poll_mux(m); - } + if (!ts) + return -EREMOTEIO; - P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n"); - schedule_timeout(SCHED_TIMEOUT * HZ); - } + if (!(ts->wr->f_flags & O_NONBLOCK)) + p9_debug(P9_DEBUG_ERROR, "blocking write ...\n"); - __set_current_state(TASK_RUNNING); - P9_DPRINTK(P9_DEBUG_MUX, "finish\n"); - return 0; + oldfs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + ret = vfs_write(ts->wr, (__force void __user *)v, len, &ts->wr->f_pos); + set_fs(oldfs); + + if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) + client->status = Disconnected; + return ret; } /** * p9_write_work - called when a transport can send some data + * @work: container for work to be done + * */ + static void p9_write_work(struct work_struct *work) { int n, err; struct p9_conn *m; - struct p9_req *req; + struct p9_req_t *req; m = container_of(work, struct p9_conn, wq); @@ -542,34 +456,33 @@ static void p9_write_work(struct work_struct *work) } if (!m->wsize) { + spin_lock(&m->client->lock); if (list_empty(&m->unsent_req_list)) { clear_bit(Wworksched, &m->wsched); + spin_unlock(&m->client->lock); return; } - spin_lock(&m->lock); -again: - req = list_entry(m->unsent_req_list.next, struct p9_req, + req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); + req->status = REQ_STATUS_SENT; + p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); - if (req->err == ERREQFLUSH) - goto again; - m->wbuf = req->tcall->sdata; - m->wsize = req->tcall->size; + m->wbuf = req->tc->sdata; + m->wsize = req->tc->size; m->wpos = 0; - spin_unlock(&m->lock); + spin_unlock(&m->client->lock); } - P9_DPRINTK(P9_DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, - m->wsize); + p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", + m, m->wpos, m->wsize); clear_bit(Wpending, &m->wsched); - err = p9_fd_write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p sent %d bytes\n", m, err); - if (err == -EAGAIN) { - clear_bit(Wworksched, &m->wsched); - return; - } + err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); + p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); + if (err == -EAGAIN) + goto end_clear; + if (err < 0) goto error; @@ -582,19 +495,21 @@ again: if (m->wpos == m->wsize) m->wpos = m->wsize = 0; - if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { +end_clear: + clear_bit(Wworksched, &m->wsched); + + if (m->wsize || !list_empty(&m->unsent_req_list)) { if (test_and_clear_bit(Wpending, &m->wsched)) n = POLLOUT; else - n = p9_fd_poll(m->trans, NULL); + n = p9_fd_poll(m->client, NULL); - if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); - } else - clear_bit(Wworksched, &m->wsched); - } else - clear_bit(Wworksched, &m->wsched); + if ((n & POLLOUT) && + !test_and_set_bit(Wworksched, &m->wsched)) { + p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); + schedule_work(&m->wq); + } + } return; @@ -603,530 +518,247 @@ error: clear_bit(Wworksched, &m->wsched); } -static void process_request(struct p9_conn *m, struct p9_req *req) +static int p9_pollwake(wait_queue_t *wait, unsigned int mode, int sync, void *key) { - int ecode; - struct p9_str *ename; - - if (!req->err && req->rcall->id == P9_RERROR) { - ecode = req->rcall->params.rerror.errno; - ename = &req->rcall->params.rerror.error; - - P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len, - ename->str); - - if (m->extended) - req->err = -ecode; + struct p9_poll_wait *pwait = + container_of(wait, struct p9_poll_wait, wait); + struct p9_conn *m = pwait->conn; + unsigned long flags; - if (!req->err) { - req->err = p9_errstr2errno(ename->str, ename->len); + spin_lock_irqsave(&p9_poll_lock, flags); + if (list_empty(&m->poll_pending_link)) + list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); + spin_unlock_irqrestore(&p9_poll_lock, flags); - /* string match failed */ - if (!req->err) { - PRINT_FCALL_ERROR("unknown error", req->rcall); - req->err = -ESERVERFAULT; - } - } - } else if (req->tcall && req->rcall->id != req->tcall->id + 1) { - P9_DPRINTK(P9_DEBUG_ERROR, - "fcall mismatch: expected %d, got %d\n", - req->tcall->id + 1, req->rcall->id); - if (!req->err) - req->err = -EIO; - } + schedule_work(&p9_poll_work); + return 1; } /** - * p9_read_work - called when there is some data to be read from a transport + * p9_pollwait - add poll task to the wait queue + * @filp: file pointer being polled + * @wait_address: wait_q to block on + * @p: poll state + * + * called by files poll operation to add v9fs-poll task to files wait queue */ -static void p9_read_work(struct work_struct *work) -{ - int n, err; - struct p9_conn *m; - struct p9_req *req, *rptr, *rreq; - struct p9_fcall *rcall; - char *rbuf; - m = container_of(work, struct p9_conn, rq); - - if (m->err < 0) - return; - - rcall = NULL; - P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos); +static void +p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) +{ + struct p9_conn *m = container_of(p, struct p9_conn, pt); + struct p9_poll_wait *pwait = NULL; + int i; - if (!m->rcall) { - m->rcall = - kmalloc(sizeof(struct p9_fcall) + m->msize, GFP_KERNEL); - if (!m->rcall) { - err = -ENOMEM; - goto error; + for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { + if (m->poll_wait[i].wait_addr == NULL) { + pwait = &m->poll_wait[i]; + break; } - - m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); - m->rpos = 0; } - clear_bit(Rpending, &m->wsched); - err = p9_fd_read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err); - if (err == -EAGAIN) { - clear_bit(Rworksched, &m->wsched); + if (!pwait) { + p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n"); return; } - if (err <= 0) - goto error; + pwait->conn = m; + pwait->wait_addr = wait_address; + init_waitqueue_func_entry(&pwait->wait, p9_pollwake); + add_wait_queue(wait_address, &pwait->wait); +} - m->rpos += err; - while (m->rpos > 4) { - n = le32_to_cpu(*(__le32 *) m->rbuf); - if (n >= m->msize) { - P9_DPRINTK(P9_DEBUG_ERROR, - "requested packet size too big: %d\n", n); - err = -EIO; - goto error; - } +/** + * p9_conn_create - initialize the per-session mux data + * @client: client instance + * + * Note: Creates the polling task if this is the first session. + */ - if (m->rpos < n) - break; +static void p9_conn_create(struct p9_client *client) +{ + int n; + struct p9_trans_fd *ts = client->trans; + struct p9_conn *m = &ts->conn; - err = - p9_deserialize_fcall(m->rbuf, n, m->rcall, m->extended); - if (err < 0) - goto error; + p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize); -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { - char buf[150]; + INIT_LIST_HEAD(&m->mux_list); + m->client = client; - p9_printfcall(buf, sizeof(buf), m->rcall, - m->extended); - printk(KERN_NOTICE ">>> %p %s\n", m, buf); - } -#endif + INIT_LIST_HEAD(&m->req_list); + INIT_LIST_HEAD(&m->unsent_req_list); + INIT_WORK(&m->rq, p9_read_work); + INIT_WORK(&m->wq, p9_write_work); + INIT_LIST_HEAD(&m->poll_pending_link); + init_poll_funcptr(&m->pt, p9_pollwait); - rcall = m->rcall; - rbuf = m->rbuf; - if (m->rpos > n) { - m->rcall = kmalloc(sizeof(struct p9_fcall) + m->msize, - GFP_KERNEL); - if (!m->rcall) { - err = -ENOMEM; - goto error; - } + n = p9_fd_poll(client, &m->pt); + if (n & POLLIN) { + p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); + set_bit(Rpending, &m->wsched); + } - m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); - memmove(m->rbuf, rbuf + n, m->rpos - n); - m->rpos -= n; - } else { - m->rcall = NULL; - m->rbuf = NULL; - m->rpos = 0; - } + if (n & POLLOUT) { + p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); + set_bit(Wpending, &m->wsched); + } +} - P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, - rcall->id, rcall->tag); - - req = NULL; - spin_lock(&m->lock); - list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { - if (rreq->tag == rcall->tag) { - req = rreq; - if (req->flush != Flushing) - list_del(&req->req_list); - break; - } - } - spin_unlock(&m->lock); +/** + * p9_poll_mux - polls a mux and schedules read or write works if necessary + * @m: connection to poll + * + */ - if (req) { - req->rcall = rcall; - process_request(m, req); +static void p9_poll_mux(struct p9_conn *m) +{ + int n; - if (req->flush != Flushing) { - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); + if (m->err < 0) + return; - wake_up(&m->equeue); - } - } else { - if (err >= 0 && rcall->id != P9_RFLUSH) - P9_DPRINTK(P9_DEBUG_ERROR, - "unexpected response mux %p id %d tag %d\n", - m, rcall->id, rcall->tag); - kfree(rcall); - } + n = p9_fd_poll(m->client, NULL); + if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { + p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); + if (n >= 0) + n = -ECONNRESET; + p9_conn_cancel(m, n); } - if (!list_empty(&m->req_list)) { - if (test_and_clear_bit(Rpending, &m->wsched)) - n = POLLIN; - else - n = p9_fd_poll(m->trans, NULL); - - if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); - } else - clear_bit(Rworksched, &m->wsched); - } else - clear_bit(Rworksched, &m->wsched); - - return; + if (n & POLLIN) { + set_bit(Rpending, &m->wsched); + p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); + if (!test_and_set_bit(Rworksched, &m->wsched)) { + p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); + schedule_work(&m->rq); + } + } -error: - p9_conn_cancel(m, err); - clear_bit(Rworksched, &m->wsched); + if (n & POLLOUT) { + set_bit(Wpending, &m->wsched); + p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); + if ((m->wsize || !list_empty(&m->unsent_req_list)) && + !test_and_set_bit(Wworksched, &m->wsched)) { + p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); + schedule_work(&m->wq); + } + } } /** - * p9_send_request - send 9P request + * p9_fd_request - send 9P request * The function can sleep until the request is scheduled for sending. * The function can be interrupted. Return from the function is not - * a guarantee that the request is sent successfully. Can return errors - * that can be retrieved by PTR_ERR macros. + * a guarantee that the request is sent successfully. + * + * @client: client instance + * @req: request to be sent * - * @m: mux data - * @tc: request to be sent - * @cb: callback function to call when response is received - * @cba: parameter to pass to the callback function */ -static struct p9_req *p9_send_request(struct p9_conn *m, - struct p9_fcall *tc, - p9_conn_req_callback cb, void *cba) + +static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { int n; - struct p9_req *req; + struct p9_trans_fd *ts = client->trans; + struct p9_conn *m = &ts->conn; - P9_DPRINTK(P9_DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current, - tc, tc->id); + p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", + m, current, req->tc, req->tc->id); if (m->err < 0) - return ERR_PTR(m->err); - - req = kmalloc(sizeof(struct p9_req), GFP_KERNEL); - if (!req) - return ERR_PTR(-ENOMEM); + return m->err; - if (tc->id == P9_TVERSION) - n = P9_NOTAG; - else - n = p9_mux_get_tag(m); - - if (n < 0) - return ERR_PTR(-ENOMEM); - - p9_set_tag(tc, n); - -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { - char buf[150]; - - p9_printfcall(buf, sizeof(buf), tc, m->extended); - printk(KERN_NOTICE "<<< %p %s\n", m, buf); - } -#endif - - spin_lock_init(&req->lock); - req->tag = n; - req->tcall = tc; - req->rcall = NULL; - req->err = 0; - req->cb = cb; - req->cba = cba; - req->flush = None; - - spin_lock(&m->lock); + spin_lock(&client->lock); + req->status = REQ_STATUS_UNSENT; list_add_tail(&req->req_list, &m->unsent_req_list); - spin_unlock(&m->lock); + spin_unlock(&client->lock); if (test_and_clear_bit(Wpending, &m->wsched)) n = POLLOUT; else - n = p9_fd_poll(m->trans, NULL); + n = p9_fd_poll(m->client, NULL); if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) - queue_work(p9_mux_wq, &m->wq); - - return req; -} - -static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req) -{ - p9_mux_put_tag(m, req->tag); - kfree(req); -} - -static void p9_mux_flush_cb(struct p9_req *freq, void *a) -{ - int tag; - struct p9_conn *m; - struct p9_req *req, *rreq, *rptr; - - m = a; - P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, - freq->tcall, freq->rcall, freq->err, - freq->tcall->params.tflush.oldtag); - - spin_lock(&m->lock); - tag = freq->tcall->params.tflush.oldtag; - req = NULL; - list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { - if (rreq->tag == tag) { - req = rreq; - list_del(&req->req_list); - break; - } - } - spin_unlock(&m->lock); - - if (req) { - spin_lock(&req->lock); - req->flush = Flushed; - spin_unlock(&req->lock); - - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - - wake_up(&m->equeue); - } - - kfree(freq->tcall); - kfree(freq->rcall); - p9_mux_free_request(m, freq); -} + schedule_work(&m->wq); -static int -p9_mux_flush_request(struct p9_conn *m, struct p9_req *req) -{ - struct p9_fcall *fc; - struct p9_req *rreq, *rptr; - - P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); - - /* if a response was received for a request, do nothing */ - spin_lock(&req->lock); - if (req->rcall || req->err) { - spin_unlock(&req->lock); - P9_DPRINTK(P9_DEBUG_MUX, - "mux %p req %p response already received\n", m, req); - return 0; - } - - req->flush = Flushing; - spin_unlock(&req->lock); - - spin_lock(&m->lock); - /* if the request is not sent yet, just remove it from the list */ - list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) { - if (rreq->tag == req->tag) { - P9_DPRINTK(P9_DEBUG_MUX, - "mux %p req %p request is not sent yet\n", m, req); - list_del(&rreq->req_list); - req->flush = Flushed; - spin_unlock(&m->lock); - if (req->cb) - (*req->cb) (req, req->cba); - return 0; - } - } - spin_unlock(&m->lock); - - clear_thread_flag(TIF_SIGPENDING); - fc = p9_create_tflush(req->tag); - p9_send_request(m, fc, p9_mux_flush_cb, m); - return 1; -} - -static void -p9_conn_rpc_cb(struct p9_req *req, void *a) -{ - struct p9_mux_rpc *r; - - P9_DPRINTK(P9_DEBUG_MUX, "req %p r %p\n", req, a); - r = a; - r->rcall = req->rcall; - r->err = req->err; - - if (req->flush != None && !req->err) - r->err = -ERESTARTSYS; - - wake_up(&r->wqueue); + return 0; } -/** - * p9_fd_rpc- sends 9P request and waits until a response is available. - * The function can be interrupted. - * @m: mux data - * @tc: request to be sent - * @rc: pointer where a pointer to the response is stored - */ -int -p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) +static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { - struct p9_trans_fd *p = t->priv; - struct p9_conn *m = p->conn; - int err, sigpending; - unsigned long flags; - struct p9_req *req; - struct p9_mux_rpc r; - - r.err = 0; - r.tcall = tc; - r.rcall = NULL; - r.m = m; - init_waitqueue_head(&r.wqueue); - - if (rc) - *rc = NULL; - - sigpending = 0; - if (signal_pending(current)) { - sigpending = 1; - clear_thread_flag(TIF_SIGPENDING); - } + int ret = 1; - req = p9_send_request(m, tc, p9_conn_rpc_cb, &r); - if (IS_ERR(req)) { - err = PTR_ERR(req); - P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); - return err; - } + p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); - err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); - if (r.err < 0) - err = r.err; - - if (err == -ERESTARTSYS && m->trans->status == Connected - && m->err == 0) { - if (p9_mux_flush_request(m, req)) { - /* wait until we get response of the flush message */ - do { - clear_thread_flag(TIF_SIGPENDING); - err = wait_event_interruptible(r.wqueue, - r.rcall || r.err); - } while (!r.rcall && !r.err && err == -ERESTARTSYS && - m->trans->status == Connected && !m->err); - - err = -ERESTARTSYS; - } - sigpending = 1; - } + spin_lock(&client->lock); - if (sigpending) { - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); + if (req->status == REQ_STATUS_UNSENT) { + list_del(&req->req_list); + req->status = REQ_STATUS_FLSHD; + ret = 0; } + spin_unlock(&client->lock); - if (rc) - *rc = r.rcall; - else - kfree(r.rcall); - - p9_mux_free_request(m, req); - if (err > 0) - err = -EIO; - - return err; + return ret; } -#ifdef P9_NONBLOCK -/** - * p9_conn_rpcnb - sends 9P request without waiting for response. - * @m: mux data - * @tc: request to be sent - * @cb: callback function to be called when response arrives - * @cba: value to pass to the callback function - */ -int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, - p9_conn_req_callback cb, void *a) +static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) { - int err; - struct p9_req *req; + p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); - req = p9_send_request(m, tc, cb, a); - if (IS_ERR(req)) { - err = PTR_ERR(req); - P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); - return PTR_ERR(req); - } + /* we haven't received a response for oldreq, + * remove it from the list. + */ + spin_lock(&client->lock); + list_del(&req->req_list); + spin_unlock(&client->lock); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag); return 0; } -#endif /* P9_NONBLOCK */ /** - * p9_conn_cancel - cancel all pending requests with error - * @m: mux data - * @err: error code - */ -void p9_conn_cancel(struct p9_conn *m, int err) -{ - struct p9_req *req, *rtmp; - LIST_HEAD(cancel_list); - - P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); - m->err = err; - spin_lock(&m->lock); - list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { - list_move(&req->req_list, &cancel_list); - } - list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { - list_move(&req->req_list, &cancel_list); - } - spin_unlock(&m->lock); - - list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { - list_del(&req->req_list); - if (!req->err) - req->err = err; - - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - } - - wake_up(&m->equeue); -} - -/** - * v9fs_parse_options - parse mount options into session structure - * @options: options string passed from mount - * @v9ses: existing v9fs session information + * parse_opts - parse mount options into p9_fd_opts structure + * @params: options string passed from mount + * @opts: fd transport-specific structure to parse options into * + * Returns 0 upon success, -ERRNO upon failure */ -static void parse_opts(char *options, struct p9_fd_opts *opts) +static int parse_opts(char *params, struct p9_fd_opts *opts) { char *p; substring_t args[MAX_OPT_ARGS]; int option; - int ret; + char *options, *tmp_options; opts->port = P9_PORT; opts->rfd = ~0; opts->wfd = ~0; - if (!options) - return; + if (!params) + return 0; + + tmp_options = kstrdup(params, GFP_KERNEL); + if (!tmp_options) { + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } + options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token; + int r; if (!*p) continue; token = match_token(p, tokens, args); - ret = match_int(&args[0], &option); - if (ret < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - continue; + if ((token != Opt_err) && (token != Opt_privport)) { + r = match_int(&args[0], &option); + if (r < 0) { + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + continue; + } } switch (token) { case Opt_port: @@ -1138,15 +770,21 @@ static void parse_opts(char *options, struct p9_fd_opts *opts) case Opt_wfdno: opts->wfd = option; break; + case Opt_privport: + opts->privport = 1; + break; default: continue; } } + + kfree(tmp_options); + return 0; } -static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd) +static int p9_fd_open(struct p9_client *client, int rfd, int wfd) { - struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), + struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!ts) return -ENOMEM; @@ -1162,360 +800,308 @@ static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd) return -EIO; } - trans->priv = ts; - trans->status = Connected; + client->trans = ts; + client->status = Connected; return 0; } -static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) +static int p9_socket_open(struct p9_client *client, struct socket *csocket) { - int fd, ret; + struct p9_trans_fd *p; + struct file *file; + + p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); + if (!p) + return -ENOMEM; csocket->sk->sk_allocation = GFP_NOIO; - fd = sock_map_fd(csocket); - if (fd < 0) { - P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); - return fd; + file = sock_alloc_file(csocket, 0, NULL); + if (IS_ERR(file)) { + pr_err("%s (%d): failed to map fd\n", + __func__, task_pid_nr(current)); + sock_release(csocket); + kfree(p); + return PTR_ERR(file); } - ret = p9_fd_open(trans, fd, fd); - if (ret < 0) { - P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to open fd\n"); - sockfd_put(csocket); - return ret; - } + get_file(file); + p->wr = p->rd = file; + client->trans = p; + client->status = Connected; - ((struct p9_trans_fd *)trans->priv)->rd->f_flags |= O_NONBLOCK; + p->rd->f_flags |= O_NONBLOCK; + p9_conn_create(client); return 0; } /** - * p9_fd_read- read from a fd - * @v9ses: session information - * @v: buffer to receive data into - * @len: size of receive buffer + * p9_mux_destroy - cancels all pending requests of mux + * @m: mux to destroy * */ -static int p9_fd_read(struct p9_trans *trans, void *v, int len) -{ - int ret; - struct p9_trans_fd *ts = NULL; - - if (trans && trans->status != Disconnected) - ts = trans->priv; - if (!ts) - return -EREMOTEIO; - - if (!(ts->rd->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n"); - - ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); - if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) - trans->status = Disconnected; - return ret; -} - -/** - * p9_fd_write - write to a socket - * @v9ses: session information - * @v: buffer to send data from - * @len: size of send buffer - * - */ -static int p9_fd_write(struct p9_trans *trans, void *v, int len) -{ - int ret; - mm_segment_t oldfs; - struct p9_trans_fd *ts = NULL; - - if (trans && trans->status != Disconnected) - ts = trans->priv; - - if (!ts) - return -EREMOTEIO; - - if (!(ts->wr->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n"); - - oldfs = get_fs(); - set_fs(get_ds()); - /* The cast to a user pointer is valid due to the set_fs() */ - ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); - set_fs(oldfs); - - if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) - trans->status = Disconnected; - return ret; -} - -static unsigned int -p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) +static void p9_conn_destroy(struct p9_conn *m) { - int ret, n; - struct p9_trans_fd *ts = NULL; - mm_segment_t oldfs; + p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", + m, m->mux_list.prev, m->mux_list.next); - if (trans && trans->status == Connected) - ts = trans->priv; - - if (!ts) - return -EREMOTEIO; - - if (!ts->rd->f_op || !ts->rd->f_op->poll) - return -EIO; - - if (!ts->wr->f_op || !ts->wr->f_op->poll) - return -EIO; - - oldfs = get_fs(); - set_fs(get_ds()); - - ret = ts->rd->f_op->poll(ts->rd, pt); - if (ret < 0) - goto end; + p9_mux_poll_stop(m); + cancel_work_sync(&m->rq); + cancel_work_sync(&m->wq); - if (ts->rd != ts->wr) { - n = ts->wr->f_op->poll(ts->wr, pt); - if (n < 0) { - ret = n; - goto end; - } - ret = (ret & ~POLLOUT) | (n & ~POLLIN); - } + p9_conn_cancel(m, -ECONNRESET); -end: - set_fs(oldfs); - return ret; + m->client = NULL; } /** - * p9_fd_close - shutdown socket - * @trans: private socket structure + * p9_fd_close - shutdown file descriptor transport + * @client: client instance * */ -static void p9_fd_close(struct p9_trans *trans) + +static void p9_fd_close(struct p9_client *client) { struct p9_trans_fd *ts; - if (!trans) + if (!client) return; - ts = xchg(&trans->priv, NULL); - + ts = client->trans; if (!ts) return; - p9_conn_destroy(ts->conn); + client->status = Disconnected; + + p9_conn_destroy(&ts->conn); - trans->status = Disconnected; if (ts->rd) fput(ts->rd); if (ts->wr) fput(ts->wr); + kfree(ts); } -static struct p9_trans * -p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) +/* + * stolen from NFS - maybe should be made a generic function? + */ +static inline int valid_ipaddr4(const char *buf) +{ + int rc, count, in[4]; + + rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); + if (rc != 4) + return -EINVAL; + for (count = 0; count < 4; count++) { + if (in[count] > 255) + return -EINVAL; + } + return 0; +} + +static int p9_bind_privport(struct socket *sock) +{ + struct sockaddr_in cl; + int port, err = -EINVAL; + + memset(&cl, 0, sizeof(cl)); + cl.sin_family = AF_INET; + cl.sin_addr.s_addr = INADDR_ANY; + for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { + cl.sin_port = htons((ushort)port); + err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); + if (err != -EADDRINUSE) + break; + } + return err; +} + + +static int +p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { int err; - struct p9_trans *trans; struct socket *csocket; struct sockaddr_in sin_server; struct p9_fd_opts opts; - struct p9_trans_fd *p; - parse_opts(args, &opts); + err = parse_opts(args, &opts); + if (err < 0) + return err; + + if (valid_ipaddr4(addr) < 0) + return -EINVAL; csocket = NULL; - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - trans->msize = msize; - trans->extended = dotu; - trans->rpc = p9_fd_rpc; - trans->close = p9_fd_close; sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); - sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); + err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_INET, + SOCK_STREAM, IPPROTO_TCP, &csocket, 1); + if (err) { + pr_err("%s (%d): problem creating socket\n", + __func__, task_pid_nr(current)); + return err; + } - if (!csocket) { - P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); - err = -EIO; - goto error; + if (opts.privport) { + err = p9_bind_privport(csocket); + if (err < 0) { + pr_err("%s (%d): problem binding to privport\n", + __func__, task_pid_nr(current)); + sock_release(csocket); + return err; + } } err = csocket->ops->connect(csocket, (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_tcp: problem connecting socket to %s\n", - addr); - goto error; - } - - err = p9_socket_open(trans, csocket); - if (err < 0) - goto error; - - p = (struct p9_trans_fd *) trans->priv; - p->conn = p9_conn_create(trans); - if (IS_ERR(p->conn)) { - err = PTR_ERR(p->conn); - p->conn = NULL; - goto error; - } - - return trans; - -error: - if (csocket) + pr_err("%s (%d): problem connecting socket to %s\n", + __func__, task_pid_nr(current), addr); sock_release(csocket); + return err; + } - kfree(trans); - return ERR_PTR(err); + return p9_socket_open(client, csocket); } -static struct p9_trans * -p9_trans_create_unix(const char *addr, char *args, int msize, - unsigned char dotu) +static int +p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_un sun_server; - struct p9_trans *trans; - struct p9_trans_fd *p; csocket = NULL; - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - - trans->rpc = p9_fd_rpc; - trans->close = p9_fd_close; - if (strlen(addr) > UNIX_PATH_MAX) { - P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", - addr); - err = -ENAMETOOLONG; - goto error; + if (strlen(addr) >= UNIX_PATH_MAX) { + pr_err("%s (%d): address too long: %s\n", + __func__, task_pid_nr(current), addr); + return -ENAMETOOLONG; } sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); - sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); - err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, - sizeof(struct sockaddr_un) - 1, 0); + err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_UNIX, + SOCK_STREAM, 0, &csocket, 1); if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_unix: problem connecting socket: %s: %d\n", - addr, err); - goto error; - } + pr_err("%s (%d): problem creating socket\n", + __func__, task_pid_nr(current)); - err = p9_socket_open(trans, csocket); - if (err < 0) - goto error; - - trans->msize = msize; - trans->extended = dotu; - p = (struct p9_trans_fd *) trans->priv; - p->conn = p9_conn_create(trans); - if (IS_ERR(p->conn)) { - err = PTR_ERR(p->conn); - p->conn = NULL; - goto error; + return err; } - - return trans; - -error: - if (csocket) + err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, + sizeof(struct sockaddr_un) - 1, 0); + if (err < 0) { + pr_err("%s (%d): problem connecting socket: %s: %d\n", + __func__, task_pid_nr(current), addr, err); sock_release(csocket); + return err; + } - kfree(trans); - return ERR_PTR(err); + return p9_socket_open(client, csocket); } -static struct p9_trans * -p9_trans_create_fd(const char *name, char *args, int msize, - unsigned char extended) +static int +p9_fd_create(struct p9_client *client, const char *addr, char *args) { int err; - struct p9_trans *trans; struct p9_fd_opts opts; struct p9_trans_fd *p; parse_opts(args, &opts); if (opts.rfd == ~0 || opts.wfd == ~0) { - printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); - return ERR_PTR(-ENOPROTOOPT); + pr_err("Insufficient options for proto=fd\n"); + return -ENOPROTOOPT; } - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - - trans->rpc = p9_fd_rpc; - trans->close = p9_fd_close; - - err = p9_fd_open(trans, opts.rfd, opts.wfd); + err = p9_fd_open(client, opts.rfd, opts.wfd); if (err < 0) - goto error; - - trans->msize = msize; - trans->extended = extended; - p = (struct p9_trans_fd *) trans->priv; - p->conn = p9_conn_create(trans); - if (IS_ERR(p->conn)) { - err = PTR_ERR(p->conn); - p->conn = NULL; - goto error; - } + return err; - return trans; + p = (struct p9_trans_fd *) client->trans; + p9_conn_create(client); -error: - kfree(trans); - return ERR_PTR(err); + return 0; } static struct p9_trans_module p9_tcp_trans = { .name = "tcp", .maxsize = MAX_SOCK_BUF, - .def = 1, - .create = p9_trans_create_tcp, + .def = 0, + .create = p9_fd_create_tcp, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_unix_trans = { .name = "unix", .maxsize = MAX_SOCK_BUF, .def = 0, - .create = p9_trans_create_unix, + .create = p9_fd_create_unix, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_fd_trans = { .name = "fd", .maxsize = MAX_SOCK_BUF, .def = 0, - .create = p9_trans_create_fd, + .create = p9_fd_create, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, + .owner = THIS_MODULE, }; -static int __init p9_trans_fd_init(void) +/** + * p9_poll_proc - poll worker thread + * @a: thread state and arguments + * + * polls all v9fs transports for new events and queues the appropriate + * work to the work queue + * + */ + +static void p9_poll_workfn(struct work_struct *work) { - int ret = p9_mux_global_init(); - if (ret) { - printk(KERN_WARNING "9p: starting mux failed\n"); - return ret; + unsigned long flags; + + p9_debug(P9_DEBUG_TRANS, "start %p\n", current); + + spin_lock_irqsave(&p9_poll_lock, flags); + while (!list_empty(&p9_poll_pending_list)) { + struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, + struct p9_conn, + poll_pending_link); + list_del_init(&conn->poll_pending_link); + spin_unlock_irqrestore(&p9_poll_lock, flags); + + p9_poll_mux(conn); + + spin_lock_irqsave(&p9_poll_lock, flags); } + spin_unlock_irqrestore(&p9_poll_lock, flags); + + p9_debug(P9_DEBUG_TRANS, "finish\n"); +} +int p9_trans_fd_init(void) +{ v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); @@ -1523,8 +1109,10 @@ static int __init p9_trans_fd_init(void) return 0; } -module_init(p9_trans_fd_init); - -MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); -MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); -MODULE_LICENSE("GPL"); +void p9_trans_fd_exit(void) +{ + flush_work(&p9_poll_work); + v9fs_unregister_trans(&p9_tcp_trans); + v9fs_unregister_trans(&p9_unix_trans); + v9fs_unregister_trans(&p9_fd_trans); +} diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c new file mode 100644 index 00000000000..14ad43b5cf8 --- /dev/null +++ b/net/9p/trans_rdma.c @@ -0,0 +1,765 @@ +/* + * linux/fs/9p/trans_rdma.c + * + * RDMA transport layer based on the trans_fd.c implementation. + * + * Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com> + * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> + * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> + * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/in.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/kthread.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/un.h> +#include <linux/uaccess.h> +#include <linux/inet.h> +#include <linux/idr.h> +#include <linux/file.h> +#include <linux/parser.h> +#include <linux/semaphore.h> +#include <linux/slab.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <net/9p/transport.h> +#include <rdma/ib_verbs.h> +#include <rdma/rdma_cm.h> + +#define P9_PORT 5640 +#define P9_RDMA_SQ_DEPTH 32 +#define P9_RDMA_RQ_DEPTH 32 +#define P9_RDMA_SEND_SGE 4 +#define P9_RDMA_RECV_SGE 4 +#define P9_RDMA_IRD 0 +#define P9_RDMA_ORD 0 +#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ +#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */ + +/** + * struct p9_trans_rdma - RDMA transport instance + * + * @state: tracks the transport state machine for connection setup and tear down + * @cm_id: The RDMA CM ID + * @pd: Protection Domain pointer + * @qp: Queue Pair pointer + * @cq: Completion Queue pointer + * @dm_mr: DMA Memory Region pointer + * @lkey: The local access only memory region key + * @timeout: Number of uSecs to wait for connection management events + * @sq_depth: The depth of the Send Queue + * @sq_sem: Semaphore for the SQ + * @rq_depth: The depth of the Receive Queue. + * @rq_sem: Semaphore for the RQ + * @excess_rc : Amount of posted Receive Contexts without a pending request. + * See rdma_request() + * @addr: The remote peer's address + * @req_lock: Protects the active request list + * @cm_done: Completion event for connection management tracking + */ +struct p9_trans_rdma { + enum { + P9_RDMA_INIT, + P9_RDMA_ADDR_RESOLVED, + P9_RDMA_ROUTE_RESOLVED, + P9_RDMA_CONNECTED, + P9_RDMA_FLUSHING, + P9_RDMA_CLOSING, + P9_RDMA_CLOSED, + } state; + struct rdma_cm_id *cm_id; + struct ib_pd *pd; + struct ib_qp *qp; + struct ib_cq *cq; + struct ib_mr *dma_mr; + u32 lkey; + long timeout; + int sq_depth; + struct semaphore sq_sem; + int rq_depth; + struct semaphore rq_sem; + atomic_t excess_rc; + struct sockaddr_in addr; + spinlock_t req_lock; + + struct completion cm_done; +}; + +/** + * p9_rdma_context - Keeps track of in-process WR + * + * @wc_op: The original WR op for when the CQE completes in error. + * @busa: Bus address to unmap when the WR completes + * @req: Keeps track of requests (send) + * @rc: Keepts track of replies (receive) + */ +struct p9_rdma_req; +struct p9_rdma_context { + enum ib_wc_opcode wc_op; + dma_addr_t busa; + union { + struct p9_req_t *req; + struct p9_fcall *rc; + }; +}; + +/** + * p9_rdma_opts - Collection of mount options + * @port: port of connection + * @sq_depth: The requested depth of the SQ. This really doesn't need + * to be any deeper than the number of threads used in the client + * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth + * @timeout: Time to wait in msecs for CM events + */ +struct p9_rdma_opts { + short port; + int sq_depth; + int rq_depth; + long timeout; +}; + +/* + * Option Parsing (code inspired by NFS code) + */ +enum { + /* Options that take integer arguments */ + Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err, +}; + +static match_table_t tokens = { + {Opt_port, "port=%u"}, + {Opt_sq_depth, "sq=%u"}, + {Opt_rq_depth, "rq=%u"}, + {Opt_timeout, "timeout=%u"}, + {Opt_err, NULL}, +}; + +/** + * parse_opts - parse mount options into rdma options structure + * @params: options string passed from mount + * @opts: rdma transport-specific structure to parse options into + * + * Returns 0 upon success, -ERRNO upon failure + */ +static int parse_opts(char *params, struct p9_rdma_opts *opts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + char *options, *tmp_options; + + opts->port = P9_PORT; + opts->sq_depth = P9_RDMA_SQ_DEPTH; + opts->rq_depth = P9_RDMA_RQ_DEPTH; + opts->timeout = P9_RDMA_TIMEOUT; + + if (!params) + return 0; + + tmp_options = kstrdup(params, GFP_KERNEL); + if (!tmp_options) { + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } + options = tmp_options; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + int r; + if (!*p) + continue; + token = match_token(p, tokens, args); + if (token == Opt_err) + continue; + r = match_int(&args[0], &option); + if (r < 0) { + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + continue; + } + switch (token) { + case Opt_port: + opts->port = option; + break; + case Opt_sq_depth: + opts->sq_depth = option; + break; + case Opt_rq_depth: + opts->rq_depth = option; + break; + case Opt_timeout: + opts->timeout = option; + break; + default: + continue; + } + } + /* RQ must be at least as large as the SQ */ + opts->rq_depth = max(opts->rq_depth, opts->sq_depth); + kfree(tmp_options); + return 0; +} + +static int +p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) +{ + struct p9_client *c = id->context; + struct p9_trans_rdma *rdma = c->trans; + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + BUG_ON(rdma->state != P9_RDMA_INIT); + rdma->state = P9_RDMA_ADDR_RESOLVED; + break; + + case RDMA_CM_EVENT_ROUTE_RESOLVED: + BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED); + rdma->state = P9_RDMA_ROUTE_RESOLVED; + break; + + case RDMA_CM_EVENT_ESTABLISHED: + BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED); + rdma->state = P9_RDMA_CONNECTED; + break; + + case RDMA_CM_EVENT_DISCONNECTED: + if (rdma) + rdma->state = P9_RDMA_CLOSED; + if (c) + c->status = Disconnected; + break; + + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + break; + + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_DEVICE_REMOVAL: + case RDMA_CM_EVENT_MULTICAST_JOIN: + case RDMA_CM_EVENT_MULTICAST_ERROR: + case RDMA_CM_EVENT_REJECTED: + case RDMA_CM_EVENT_CONNECT_REQUEST: + case RDMA_CM_EVENT_CONNECT_RESPONSE: + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + c->status = Disconnected; + rdma_disconnect(rdma->cm_id); + break; + default: + BUG(); + } + complete(&rdma->cm_done); + return 0; +} + +static void +handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, + struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +{ + struct p9_req_t *req; + int err = 0; + int16_t tag; + + req = NULL; + ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, + DMA_FROM_DEVICE); + + if (status != IB_WC_SUCCESS) + goto err_out; + + err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); + if (err) + goto err_out; + + req = p9_tag_lookup(client, tag); + if (!req) + goto err_out; + + /* Check that we have not yet received a reply for this request. + */ + if (unlikely(req->rc)) { + pr_err("Duplicate reply for request %d", tag); + goto err_out; + } + + req->rc = c->rc; + p9_client_cb(client, req, REQ_STATUS_RCVD); + + return; + + err_out: + p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); + rdma->state = P9_RDMA_FLUSHING; + client->status = Disconnected; +} + +static void +handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, + struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +{ + ib_dma_unmap_single(rdma->cm_id->device, + c->busa, c->req->tc->size, + DMA_TO_DEVICE); +} + +static void qp_event_handler(struct ib_event *event, void *context) +{ + p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n", + event->event, context); +} + +static void cq_comp_handler(struct ib_cq *cq, void *cq_context) +{ + struct p9_client *client = cq_context; + struct p9_trans_rdma *rdma = client->trans; + int ret; + struct ib_wc wc; + + ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); + while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { + struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id; + + switch (c->wc_op) { + case IB_WC_RECV: + handle_recv(client, rdma, c, wc.status, wc.byte_len); + up(&rdma->rq_sem); + break; + + case IB_WC_SEND: + handle_send(client, rdma, c, wc.status, wc.byte_len); + up(&rdma->sq_sem); + break; + + default: + pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n", + c->wc_op, wc.opcode, wc.status); + break; + } + kfree(c); + } +} + +static void cq_event_handler(struct ib_event *e, void *v) +{ + p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); +} + +static void rdma_destroy_trans(struct p9_trans_rdma *rdma) +{ + if (!rdma) + return; + + if (rdma->dma_mr && !IS_ERR(rdma->dma_mr)) + ib_dereg_mr(rdma->dma_mr); + + if (rdma->qp && !IS_ERR(rdma->qp)) + ib_destroy_qp(rdma->qp); + + if (rdma->pd && !IS_ERR(rdma->pd)) + ib_dealloc_pd(rdma->pd); + + if (rdma->cq && !IS_ERR(rdma->cq)) + ib_destroy_cq(rdma->cq); + + if (rdma->cm_id && !IS_ERR(rdma->cm_id)) + rdma_destroy_id(rdma->cm_id); + + kfree(rdma); +} + +static int +post_recv(struct p9_client *client, struct p9_rdma_context *c) +{ + struct p9_trans_rdma *rdma = client->trans; + struct ib_recv_wr wr, *bad_wr; + struct ib_sge sge; + + c->busa = ib_dma_map_single(rdma->cm_id->device, + c->rc->sdata, client->msize, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) + goto error; + + sge.addr = c->busa; + sge.length = client->msize; + sge.lkey = rdma->lkey; + + wr.next = NULL; + c->wc_op = IB_WC_RECV; + wr.wr_id = (unsigned long) c; + wr.sg_list = &sge; + wr.num_sge = 1; + return ib_post_recv(rdma->qp, &wr, &bad_wr); + + error: + p9_debug(P9_DEBUG_ERROR, "EIO\n"); + return -EIO; +} + +static int rdma_request(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + struct ib_send_wr wr, *bad_wr; + struct ib_sge sge; + int err = 0; + unsigned long flags; + struct p9_rdma_context *c = NULL; + struct p9_rdma_context *rpl_context = NULL; + + /* When an error occurs between posting the recv and the send, + * there will be a receive context posted without a pending request. + * Since there is no way to "un-post" it, we remember it and skip + * post_recv() for the next request. + * So here, + * see if we are this `next request' and need to absorb an excess rc. + * If yes, then drop and free our own, and do not recv_post(). + **/ + if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { + if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { + /* Got one ! */ + kfree(req->rc); + req->rc = NULL; + goto dont_need_post_recv; + } else { + /* We raced and lost. */ + atomic_inc(&rdma->excess_rc); + } + } + + /* Allocate an fcall for the reply */ + rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); + if (!rpl_context) { + err = -ENOMEM; + goto recv_error; + } + rpl_context->rc = req->rc; + + /* + * Post a receive buffer for this request. We need to ensure + * there is a reply buffer available for every outstanding + * request. A flushed request can result in no reply for an + * outstanding request, so we must keep a count to avoid + * overflowing the RQ. + */ + if (down_interruptible(&rdma->rq_sem)) { + err = -EINTR; + goto recv_error; + } + + err = post_recv(client, rpl_context); + if (err) { + p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); + goto recv_error; + } + /* remove posted receive buffer from request structure */ + req->rc = NULL; + +dont_need_post_recv: + /* Post the request */ + c = kmalloc(sizeof *c, GFP_NOFS); + if (!c) { + err = -ENOMEM; + goto send_error; + } + c->req = req; + + c->busa = ib_dma_map_single(rdma->cm_id->device, + c->req->tc->sdata, c->req->tc->size, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { + err = -EIO; + goto send_error; + } + + sge.addr = c->busa; + sge.length = c->req->tc->size; + sge.lkey = rdma->lkey; + + wr.next = NULL; + c->wc_op = IB_WC_SEND; + wr.wr_id = (unsigned long) c; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + wr.sg_list = &sge; + wr.num_sge = 1; + + if (down_interruptible(&rdma->sq_sem)) { + err = -EINTR; + goto send_error; + } + + /* Mark request as `sent' *before* we actually send it, + * because doing if after could erase the REQ_STATUS_RCVD + * status in case of a very fast reply. + */ + req->status = REQ_STATUS_SENT; + err = ib_post_send(rdma->qp, &wr, &bad_wr); + if (err) + goto send_error; + + /* Success */ + return 0; + + /* Handle errors that happened during or while preparing the send: */ + send_error: + req->status = REQ_STATUS_ERROR; + kfree(c); + p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); + + /* Ach. + * We did recv_post(), but not send. We have one recv_post in excess. + */ + atomic_inc(&rdma->excess_rc); + return err; + + /* Handle errors that happened during or while preparing post_recv(): */ + recv_error: + kfree(rpl_context); + spin_lock_irqsave(&rdma->req_lock, flags); + if (rdma->state < P9_RDMA_CLOSING) { + rdma->state = P9_RDMA_CLOSING; + spin_unlock_irqrestore(&rdma->req_lock, flags); + rdma_disconnect(rdma->cm_id); + } else + spin_unlock_irqrestore(&rdma->req_lock, flags); + return err; +} + +static void rdma_close(struct p9_client *client) +{ + struct p9_trans_rdma *rdma; + + if (!client) + return; + + rdma = client->trans; + if (!rdma) + return; + + client->status = Disconnected; + rdma_disconnect(rdma->cm_id); + rdma_destroy_trans(rdma); +} + +/** + * alloc_rdma - Allocate and initialize the rdma transport structure + * @opts: Mount options structure + */ +static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) +{ + struct p9_trans_rdma *rdma; + + rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL); + if (!rdma) + return NULL; + + rdma->sq_depth = opts->sq_depth; + rdma->rq_depth = opts->rq_depth; + rdma->timeout = opts->timeout; + spin_lock_init(&rdma->req_lock); + init_completion(&rdma->cm_done); + sema_init(&rdma->sq_sem, rdma->sq_depth); + sema_init(&rdma->rq_sem, rdma->rq_depth); + atomic_set(&rdma->excess_rc, 0); + + return rdma; +} + +static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) +{ + /* Nothing to do here. + * We will take care of it (if we have to) in rdma_cancelled() + */ + return 1; +} + +/* A request has been fully flushed without a reply. + * That means we have posted one buffer in excess. + */ +static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + atomic_inc(&rdma->excess_rc); + return 0; +} + +/** + * trans_create_rdma - Transport method for creating atransport instance + * @client: client instance + * @addr: IP address string + * @args: Mount options string + */ +static int +rdma_create_trans(struct p9_client *client, const char *addr, char *args) +{ + int err; + struct p9_rdma_opts opts; + struct p9_trans_rdma *rdma; + struct rdma_conn_param conn_param; + struct ib_qp_init_attr qp_attr; + struct ib_device_attr devattr; + + /* Parse the transport specific mount options */ + err = parse_opts(args, &opts); + if (err < 0) + return err; + + /* Create and initialize the RDMA transport structure */ + rdma = alloc_rdma(&opts); + if (!rdma) + return -ENOMEM; + + /* Create the RDMA CM ID */ + rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, + IB_QPT_RC); + if (IS_ERR(rdma->cm_id)) + goto error; + + /* Associate the client with the transport */ + client->trans = rdma; + + /* Resolve the server's address */ + rdma->addr.sin_family = AF_INET; + rdma->addr.sin_addr.s_addr = in_aton(addr); + rdma->addr.sin_port = htons(opts.port); + err = rdma_resolve_addr(rdma->cm_id, NULL, + (struct sockaddr *)&rdma->addr, + rdma->timeout); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) + goto error; + + /* Resolve the route to the server */ + err = rdma_resolve_route(rdma->cm_id, rdma->timeout); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) + goto error; + + /* Query the device attributes */ + err = ib_query_device(rdma->cm_id->device, &devattr); + if (err) + goto error; + + /* Create the Completion Queue */ + rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, + cq_event_handler, client, + opts.sq_depth + opts.rq_depth + 1, 0); + if (IS_ERR(rdma->cq)) + goto error; + ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); + + /* Create the Protection Domain */ + rdma->pd = ib_alloc_pd(rdma->cm_id->device); + if (IS_ERR(rdma->pd)) + goto error; + + /* Cache the DMA lkey in the transport */ + rdma->dma_mr = NULL; + if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) + rdma->lkey = rdma->cm_id->device->local_dma_lkey; + else { + rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(rdma->dma_mr)) + goto error; + rdma->lkey = rdma->dma_mr->lkey; + } + + /* Create the Queue Pair */ + memset(&qp_attr, 0, sizeof qp_attr); + qp_attr.event_handler = qp_event_handler; + qp_attr.qp_context = client; + qp_attr.cap.max_send_wr = opts.sq_depth; + qp_attr.cap.max_recv_wr = opts.rq_depth; + qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; + qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; + qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + qp_attr.qp_type = IB_QPT_RC; + qp_attr.send_cq = rdma->cq; + qp_attr.recv_cq = rdma->cq; + err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); + if (err) + goto error; + rdma->qp = rdma->cm_id->qp; + + /* Request a connection */ + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.private_data = NULL; + conn_param.private_data_len = 0; + conn_param.responder_resources = P9_RDMA_IRD; + conn_param.initiator_depth = P9_RDMA_ORD; + err = rdma_connect(rdma->cm_id, &conn_param); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_CONNECTED)) + goto error; + + client->status = Connected; + + return 0; + +error: + rdma_destroy_trans(rdma); + return -ENOTCONN; +} + +static struct p9_trans_module p9_rdma_trans = { + .name = "rdma", + .maxsize = P9_RDMA_MAXSIZE, + .def = 0, + .owner = THIS_MODULE, + .create = rdma_create_trans, + .close = rdma_close, + .request = rdma_request, + .cancel = rdma_cancel, + .cancelled = rdma_cancelled, +}; + +/** + * p9_trans_rdma_init - Register the 9P RDMA transport driver + */ +static int __init p9_trans_rdma_init(void) +{ + v9fs_register_trans(&p9_rdma_trans); + return 0; +} + +static void __exit p9_trans_rdma_exit(void) +{ + v9fs_unregister_trans(&p9_rdma_trans); +} + +module_init(p9_trans_rdma_init); +module_exit(p9_trans_rdma_exit); + +MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); +MODULE_DESCRIPTION("RDMA Transport for 9P"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index de7a9f532ed..6940d8fe897 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -1,12 +1,10 @@ /* - * The Guest 9p transport driver + * The Virtio 9p transport driver * * This is a block based transport driver based on the lguest block driver * code. * - */ -/* - * Copyright (C) 2007 Eric Van Hensbergen, IBM Corporation + * Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation * * Based on virtio console driver * Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation @@ -28,6 +26,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> @@ -39,90 +39,68 @@ #include <linux/inet.h> #include <linux/idr.h> #include <linux/file.h> +#include <linux/highmem.h> +#include <linux/slab.h> #include <net/9p/9p.h> #include <linux/parser.h> +#include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/scatterlist.h> +#include <linux/swap.h> #include <linux/virtio.h> #include <linux/virtio_9p.h> +#include "trans_common.h" #define VIRTQUEUE_NUM 128 /* a single mutex to manage channel initialization and attachment */ -static DECLARE_MUTEX(virtio_9p_lock); -/* global which tracks highest initialized channel */ -static int chan_index; - -#define P9_INIT_MAXTAG 16 - -#define REQ_STATUS_IDLE 0 -#define REQ_STATUS_SENT 1 -#define REQ_STATUS_RCVD 2 -#define REQ_STATUS_FLSH 3 - -struct p9_req_t { - int status; - wait_queue_head_t *wq; -}; - -/* We keep all per-channel information in a structure. +static DEFINE_MUTEX(virtio_9p_lock); +static DECLARE_WAIT_QUEUE_HEAD(vp_wq); +static atomic_t vp_pinned = ATOMIC_INIT(0); + +/** + * struct virtio_chan - per-instance transport information + * @initialized: whether the channel is initialized + * @inuse: whether the channel is in use + * @lock: protects multiple elements within this structure + * @client: client instance + * @vdev: virtio dev associated with this channel + * @vq: virtio queue associated with this channel + * @sg: scatter gather list which is used to pack a request (protected?) + * + * We keep all per-channel information in a structure. * This structure is allocated within the devices dev->mem space. * A pointer to the structure will get put in the transport private. + * */ -static struct virtio_chan { - bool initialized; /* channel is initialized */ - bool inuse; /* channel is in use */ + +struct virtio_chan { + bool inuse; spinlock_t lock; + struct p9_client *client; struct virtio_device *vdev; struct virtqueue *vq; - - struct p9_idpool *tagpool; - struct p9_req_t *reqs; - int max_tag; - + int ring_bufs_avail; + wait_queue_head_t *vc_wq; + /* This is global limit. Since we don't have a global structure, + * will be placing it in each channel. + */ + unsigned long p9_max_pages; /* Scatterlist: can be too big for stack. */ struct scatterlist sg[VIRTQUEUE_NUM]; -} channels[MAX_9P_CHAN]; -/* Lookup requests by tag */ -static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag) -{ - /* This looks up the original request by tag so we know which - * buffer to read the data into */ - tag++; - - while (tag >= c->max_tag) { - int old_max = c->max_tag; - int count; - - if (c->max_tag) - c->max_tag *= 2; - else - c->max_tag = P9_INIT_MAXTAG; - - c->reqs = krealloc(c->reqs, sizeof(struct p9_req_t)*c->max_tag, - GFP_ATOMIC); - if (!c->reqs) { - printk(KERN_ERR "Couldn't grow tag array\n"); - BUG(); - } - for (count = old_max; count < c->max_tag; count++) { - c->reqs[count].status = REQ_STATUS_IDLE; - c->reqs[count].wq = kmalloc(sizeof(wait_queue_head_t), - GFP_ATOMIC); - if (!c->reqs[count].wq) { - printk(KERN_ERR "Couldn't grow tag array\n"); - BUG(); - } - init_waitqueue_head(c->reqs[count].wq); - } - } + int tag_len; + /* + * tag name to identify a mount Non-null terminated + */ + char *tag; - return &c->reqs[tag]; -} + struct list_head chan_list; +}; +static struct list_head virtio_chan_list; /* How many bytes left in this page. */ static unsigned int rest_of_page(void *data) @@ -130,48 +108,82 @@ static unsigned int rest_of_page(void *data) return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); } -static void p9_virtio_close(struct p9_trans *trans) -{ - struct virtio_chan *chan = trans->priv; - int count; - unsigned int flags; - - spin_lock_irqsave(&chan->lock, flags); - p9_idpool_destroy(chan->tagpool); - for (count = 0; count < chan->max_tag; count++) - kfree(chan->reqs[count].wq); - kfree(chan->reqs); - chan->max_tag = 0; - spin_unlock_irqrestore(&chan->lock, flags); +/** + * p9_virtio_close - reclaim resources of a channel + * @client: client instance + * + * This reclaims a channel by freeing its resources and + * reseting its inuse flag. + * + */ - down(&virtio_9p_lock); - chan->inuse = false; - up(&virtio_9p_lock); +static void p9_virtio_close(struct p9_client *client) +{ + struct virtio_chan *chan = client->trans; - kfree(trans); + mutex_lock(&virtio_9p_lock); + if (chan) + chan->inuse = false; + mutex_unlock(&virtio_9p_lock); } +/** + * req_done - callback which signals activity from the server + * @vq: virtio queue activity was received on + * + * This notifies us that the server has triggered some activity + * on the virtio channel - most likely a response to request we + * sent. Figure out which requests now have responses and wake up + * those threads. + * + * Bugs: could do with some additional sanity checking, but appears to work. + * + */ + static void req_done(struct virtqueue *vq) { struct virtio_chan *chan = vq->vdev->priv; struct p9_fcall *rc; unsigned int len; - unsigned long flags; struct p9_req_t *req; + unsigned long flags; - spin_lock_irqsave(&chan->lock, flags); - while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) { - req = p9_lookup_tag(chan, rc->tag); - req->status = REQ_STATUS_RCVD; - wake_up(req->wq); + p9_debug(P9_DEBUG_TRANS, ": request done\n"); + + while (1) { + spin_lock_irqsave(&chan->lock, flags); + rc = virtqueue_get_buf(chan->vq, &len); + if (rc == NULL) { + spin_unlock_irqrestore(&chan->lock, flags); + break; + } + chan->ring_bufs_avail = 1; + spin_unlock_irqrestore(&chan->lock, flags); + /* Wakeup if anyone waiting for VirtIO ring space. */ + wake_up(chan->vc_wq); + p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc); + p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); + req = p9_tag_lookup(chan->client, rc->tag); + p9_client_cb(chan->client, req, REQ_STATUS_RCVD); } - /* In case queue is stopped waiting for more buffers. */ - spin_unlock_irqrestore(&chan->lock, flags); } -static int -pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, - int count) +/** + * pack_sg_list - pack a scatter gather list from a linear buffer + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @limit: maximum segment to pack data to + * @data: data to pack into scatter/gather list + * @count: amount of data to pack into the scatter/gather list + * + * sg_lists have multiple segments of various sizes. This will pack + * arbitrary data into an existing scatter gather list, segmenting the + * data as necessary within constraints. + * + */ + +static int pack_sg_list(struct scatterlist *sg, int start, + int limit, char *data, int count) { int s; int index = start; @@ -180,103 +192,341 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, s = rest_of_page(data); if (s > count) s = count; + BUG_ON(index > limit); + /* Make sure we don't terminate early. */ + sg_unmark_end(&sg[index]); sg_set_buf(&sg[index++], data, s); count -= s; data += s; - BUG_ON(index > limit); } - + if (index-start) + sg_mark_end(&sg[index - 1]); return index-start; } +/* We don't currently allow canceling of virtio requests */ +static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) +{ + return 1; +} + +/** + * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, + * this takes a list of pages. + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @pdata: a list of pages to add into sg. + * @nr_pages: number of pages to pack into the scatter/gather list + * @data: data to pack into scatter/gather list + * @count: amount of data to pack into the scatter/gather list + */ static int -p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) +pack_sg_list_p(struct scatterlist *sg, int start, int limit, + struct page **pdata, int nr_pages, char *data, int count) { - int in, out; - int n, err, size; - struct virtio_chan *chan = t->priv; - char *rdata; - struct p9_req_t *req; - unsigned long flags; + int i = 0, s; + int data_off; + int index = start; - if (*rc == NULL) { - *rc = kmalloc(sizeof(struct p9_fcall) + t->msize, GFP_KERNEL); - if (!*rc) - return -ENOMEM; + BUG_ON(nr_pages > (limit - start)); + /* + * if the first page doesn't start at + * page boundary find the offset + */ + data_off = offset_in_page(data); + while (nr_pages) { + s = rest_of_page(data); + if (s > count) + s = count; + /* Make sure we don't terminate early. */ + sg_unmark_end(&sg[index]); + sg_set_page(&sg[index++], pdata[i++], s, data_off); + data_off = 0; + data += s; + count -= s; + nr_pages--; } - rdata = (char *)*rc+sizeof(struct p9_fcall); + if (index-start) + sg_mark_end(&sg[index - 1]); + return index - start; +} - n = P9_NOTAG; - if (tc->id != P9_TVERSION) { - n = p9_idpool_get(chan->tagpool); - if (n < 0) - return -ENOMEM; - } +/** + * p9_virtio_request - issue a request + * @client: client instance issuing the request + * @req: request to be issued + * + */ +static int +p9_virtio_request(struct p9_client *client, struct p9_req_t *req) +{ + int err; + int in, out, out_sgs, in_sgs; + unsigned long flags; + struct virtio_chan *chan = client->trans; + struct scatterlist *sgs[2]; + + p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); + + req->status = REQ_STATUS_SENT; +req_retry: spin_lock_irqsave(&chan->lock, flags); - req = p9_lookup_tag(chan, n); - spin_unlock_irqrestore(&chan->lock, flags); - p9_set_tag(tc, n); + out_sgs = in_sgs = 0; + /* Handle out VirtIO ring buffers */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); + if (out) + sgs[out_sgs++] = chan->sg; + + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); + if (in) + sgs[out_sgs + in_sgs++] = chan->sg + out; + + err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, + GFP_ATOMIC); + if (err < 0) { + if (err == -ENOSPC) { + chan->ring_bufs_avail = 0; + spin_unlock_irqrestore(&chan->lock, flags); + err = wait_event_interruptible(*chan->vc_wq, + chan->ring_bufs_avail); + if (err == -ERESTARTSYS) + return err; + + p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); + goto req_retry; + } else { + spin_unlock_irqrestore(&chan->lock, flags); + p9_debug(P9_DEBUG_TRANS, + "virtio rpc add_sgs returned failure\n"); + return -EIO; + } + } + virtqueue_kick(chan->vq); + spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc tag %d\n", n); + p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); + return 0; +} - out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, tc->sdata, tc->size); - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, t->msize); +static int p9_get_mapped_pages(struct virtio_chan *chan, + struct page **pages, char *data, + int nr_pages, int write, int kern_buf) +{ + int err; + if (!kern_buf) { + /* + * We allow only p9_max_pages pinned. We wait for the + * Other zc request to finish here + */ + if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { + err = wait_event_interruptible(vp_wq, + (atomic_read(&vp_pinned) < chan->p9_max_pages)); + if (err == -ERESTARTSYS) + return err; + } + err = p9_payload_gup(data, &nr_pages, pages, write); + if (err < 0) + return err; + atomic_add(nr_pages, &vp_pinned); + } else { + /* kernel buffer, no need to pin pages */ + int s, index = 0; + int count = nr_pages; + while (nr_pages) { + s = rest_of_page(data); + if (is_vmalloc_addr(data)) + pages[index++] = vmalloc_to_page(data); + else + pages[index++] = kmap_to_page(data); + data += s; + nr_pages--; + } + nr_pages = count; + } + return nr_pages; +} +/** + * p9_virtio_zc_request - issue a zero copy request + * @client: client instance issuing the request + * @req: request to be issued + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * + */ +static int +p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, + char *uidata, char *uodata, int inlen, + int outlen, int in_hdr_len, int kern_buf) +{ + int in, out, err, out_sgs, in_sgs; + unsigned long flags; + int in_nr_pages = 0, out_nr_pages = 0; + struct page **in_pages = NULL, **out_pages = NULL; + struct virtio_chan *chan = client->trans; + struct scatterlist *sgs[4]; + + p9_debug(P9_DEBUG_TRANS, "virtio request\n"); + + if (uodata) { + out_nr_pages = p9_nr_pages(uodata, outlen); + out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, + GFP_NOFS); + if (!out_pages) { + err = -ENOMEM; + goto err_out; + } + out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, + out_nr_pages, 0, kern_buf); + if (out_nr_pages < 0) { + err = out_nr_pages; + kfree(out_pages); + out_pages = NULL; + goto err_out; + } + } + if (uidata) { + in_nr_pages = p9_nr_pages(uidata, inlen); + in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, + GFP_NOFS); + if (!in_pages) { + err = -ENOMEM; + goto err_out; + } + in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, + in_nr_pages, 1, kern_buf); + if (in_nr_pages < 0) { + err = in_nr_pages; + kfree(in_pages); + in_pages = NULL; + goto err_out; + } + } req->status = REQ_STATUS_SENT; +req_retry_pinned: + spin_lock_irqsave(&chan->lock, flags); - if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, tc)) { - P9_DPRINTK(P9_DEBUG_TRANS, - "9p debug: virtio rpc add_buf returned failure"); - return -EIO; - } + out_sgs = in_sgs = 0; - chan->vq->vq_ops->kick(chan->vq); + /* out data */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); - wait_event(*req->wq, req->status == REQ_STATUS_RCVD); + if (out) + sgs[out_sgs++] = chan->sg; - size = le32_to_cpu(*(__le32 *) rdata); + if (out_pages) { + sgs[out_sgs++] = chan->sg + out; + out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + out_pages, out_nr_pages, uodata, outlen); + } + + /* + * Take care of in data + * For example TREAD have 11. + * 11 is the read/write header = PDU Header(7) + IO Size (4). + * Arrange in such a way that server places header in the + * alloced memory and payload onto the user buffer. + */ + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); + if (in) + sgs[out_sgs + in_sgs++] = chan->sg + out; + + if (in_pages) { + sgs[out_sgs + in_sgs++] = chan->sg + out + in; + in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, + in_pages, in_nr_pages, uidata, inlen); + } - err = p9_deserialize_fcall(rdata, size, *rc, t->extended); + BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs)); + err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, + GFP_ATOMIC); if (err < 0) { - P9_DPRINTK(P9_DEBUG_TRANS, - "9p debug: virtio rpc deserialize returned %d\n", err); - return err; + if (err == -ENOSPC) { + chan->ring_bufs_avail = 0; + spin_unlock_irqrestore(&chan->lock, flags); + err = wait_event_interruptible(*chan->vc_wq, + chan->ring_bufs_avail); + if (err == -ERESTARTSYS) + goto err_out; + + p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); + goto req_retry_pinned; + } else { + spin_unlock_irqrestore(&chan->lock, flags); + p9_debug(P9_DEBUG_TRANS, + "virtio rpc add_sgs returned failure\n"); + err = -EIO; + goto err_out; + } } - -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { - char buf[150]; - - p9_printfcall(buf, sizeof(buf), *rc, t->extended); - printk(KERN_NOTICE ">>> %p %s\n", t, buf); + virtqueue_kick(chan->vq); + spin_unlock_irqrestore(&chan->lock, flags); + p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); + err = wait_event_interruptible(*req->wq, + req->status >= REQ_STATUS_RCVD); + /* + * Non kernel buffers are pinned, unpin them + */ +err_out: + if (!kern_buf) { + if (in_pages) { + p9_release_pages(in_pages, in_nr_pages); + atomic_sub(in_nr_pages, &vp_pinned); + } + if (out_pages) { + p9_release_pages(out_pages, out_nr_pages); + atomic_sub(out_nr_pages, &vp_pinned); + } + /* wakeup anybody waiting for slots to pin pages */ + wake_up(&vp_wq); } -#endif + kfree(in_pages); + kfree(out_pages); + return err; +} - if (n != P9_NOTAG && p9_idpool_check(n, chan->tagpool)) - p9_idpool_put(n, chan->tagpool); +static ssize_t p9_mount_tag_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct virtio_chan *chan; + struct virtio_device *vdev; - req->status = REQ_STATUS_IDLE; + vdev = dev_to_virtio(dev); + chan = vdev->priv; - return 0; + return snprintf(buf, chan->tag_len + 1, "%s", chan->tag); } +static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL); + +/** + * p9_virtio_probe - probe for existence of 9P virtio channels + * @vdev: virtio device to probe + * + * This probes for existing virtio channels. + * + */ + static int p9_virtio_probe(struct virtio_device *vdev) { + __u16 tag_len; + char *tag; int err; struct virtio_chan *chan; - int index; - - down(&virtio_9p_lock); - index = chan_index++; - chan = &channels[index]; - up(&virtio_9p_lock); - if (chan_index > MAX_9P_CHAN) { - printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n"); - BUG(); + chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL); + if (!chan) { + pr_err("Failed to allocate virtio 9P channel\n"); err = -ENOMEM; goto fail; } @@ -284,7 +534,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->vdev = vdev; /* We expect one virtqueue, for requests. */ - chan->vq = vdev->config->find_vq(vdev, 0, req_done); + chan->vq = virtio_find_single_vq(vdev, req_done, "requests"); if (IS_ERR(chan->vq)) { err = PTR_ERR(chan->vq); goto out_free_vq; @@ -295,113 +545,169 @@ static int p9_virtio_probe(struct virtio_device *vdev) sg_init_table(chan->sg, VIRTQUEUE_NUM); chan->inuse = false; - chan->initialized = true; + if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { + virtio_cread(vdev, struct virtio_9p_config, tag_len, &tag_len); + } else { + err = -EINVAL; + goto out_free_vq; + } + tag = kmalloc(tag_len, GFP_KERNEL); + if (!tag) { + err = -ENOMEM; + goto out_free_vq; + } + + virtio_cread_bytes(vdev, offsetof(struct virtio_9p_config, tag), + tag, tag_len); + chan->tag = tag; + chan->tag_len = tag_len; + err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); + if (err) { + goto out_free_tag; + } + chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); + if (!chan->vc_wq) { + err = -ENOMEM; + goto out_free_tag; + } + init_waitqueue_head(chan->vc_wq); + chan->ring_bufs_avail = 1; + /* Ceiling limit to avoid denial of service attacks */ + chan->p9_max_pages = nr_free_buffer_pages()/4; + + mutex_lock(&virtio_9p_lock); + list_add_tail(&chan->chan_list, &virtio_chan_list); + mutex_unlock(&virtio_9p_lock); + + /* Let udev rules use the new mount_tag attribute. */ + kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); + return 0; +out_free_tag: + kfree(tag); out_free_vq: - vdev->config->del_vq(chan->vq); + vdev->config->del_vqs(vdev); + kfree(chan); fail: - down(&virtio_9p_lock); - chan_index--; - up(&virtio_9p_lock); return err; } -/* This sets up a transport channel for 9p communication. Right now + +/** + * p9_virtio_create - allocate a new virtio channel + * @client: client instance invoking this transport + * @devname: string identifying the channel to connect to (unused) + * @args: args passed from sys_mount() for per-transport options (unused) + * + * This sets up a transport channel for 9p communication. Right now * we only match the first available channel, but eventually we couldlook up * alternate channels by matching devname versus a virtio_config entry. * We use a simple reference count mechanism to ensure that only a single - * mount has a channel open at a time. */ -static struct p9_trans * -p9_virtio_create(const char *devname, char *args, int msize, - unsigned char extended) + * mount has a channel open at a time. + * + */ + +static int +p9_virtio_create(struct p9_client *client, const char *devname, char *args) { - struct p9_trans *trans; - struct virtio_chan *chan = channels; - int index = 0; - - down(&virtio_9p_lock); - while (index < MAX_9P_CHAN) { - if (chan->initialized && !chan->inuse) { - chan->inuse = true; - break; - } else { - index++; - chan = &channels[index]; + struct virtio_chan *chan; + int ret = -ENOENT; + int found = 0; + + mutex_lock(&virtio_9p_lock); + list_for_each_entry(chan, &virtio_chan_list, chan_list) { + if (!strncmp(devname, chan->tag, chan->tag_len) && + strlen(devname) == chan->tag_len) { + if (!chan->inuse) { + chan->inuse = true; + found = 1; + break; + } + ret = -EBUSY; } } - up(&virtio_9p_lock); + mutex_unlock(&virtio_9p_lock); - if (index >= MAX_9P_CHAN) { - printk(KERN_ERR "9p: no channels available\n"); - return ERR_PTR(-ENODEV); + if (!found) { + pr_err("no channels available\n"); + return ret; } - chan->tagpool = p9_idpool_create(); - if (IS_ERR(chan->tagpool)) { - printk(KERN_ERR "9p: couldn't allocate tagpool\n"); - return ERR_PTR(-ENOMEM); - } - p9_idpool_get(chan->tagpool); /* reserve tag 0 */ - chan->max_tag = 0; - chan->reqs = NULL; - - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) { - printk(KERN_ERR "9p: couldn't allocate transport\n"); - return ERR_PTR(-ENOMEM); - } - trans->extended = extended; - trans->msize = msize; - trans->close = p9_virtio_close; - trans->rpc = p9_virtio_rpc; - trans->priv = chan; + client->trans = (void *)chan; + client->status = Connected; + chan->client = client; - return trans; + return 0; } +/** + * p9_virtio_remove - clean up resources associated with a virtio device + * @vdev: virtio device to remove + * + */ + static void p9_virtio_remove(struct virtio_device *vdev) { struct virtio_chan *chan = vdev->priv; - BUG_ON(chan->inuse); + if (chan->inuse) + p9_virtio_close(chan->client); + vdev->config->del_vqs(vdev); - if (chan->initialized) { - vdev->config->del_vq(chan->vq); - chan->initialized = false; - } -} + mutex_lock(&virtio_9p_lock); + list_del(&chan->chan_list); + mutex_unlock(&virtio_9p_lock); + sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); + kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); + kfree(chan->tag); + kfree(chan->vc_wq); + kfree(chan); -#define VIRTIO_ID_9P 9 +} static struct virtio_device_id id_table[] = { { VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID }, { 0 }, }; +static unsigned int features[] = { + VIRTIO_9P_MOUNT_TAG, +}; + /* The standard "struct lguest_driver": */ static struct virtio_driver p9_virtio_drv = { - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = p9_virtio_probe, - .remove = p9_virtio_remove, + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = p9_virtio_probe, + .remove = p9_virtio_remove, }; static struct p9_trans_module p9_virtio_trans = { .name = "virtio", .create = p9_virtio_create, - .maxsize = PAGE_SIZE*16, - .def = 0, + .close = p9_virtio_close, + .request = p9_virtio_request, + .zc_request = p9_virtio_zc_request, + .cancel = p9_virtio_cancel, + /* + * We leave one entry for input and one entry for response + * headers. We also skip one more entry to accomodate, address + * that are not at page boundary, that can result in an extra + * page in zero copy. + */ + .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), + .def = 1, + .owner = THIS_MODULE, }; /* The standard init function */ static int __init p9_virtio_init(void) { - int count; - - for (count = 0; count < MAX_9P_CHAN; count++) - channels[count].initialized = false; + INIT_LIST_HEAD(&virtio_chan_list); v9fs_register_trans(&p9_virtio_trans); return register_virtio_driver(&p9_virtio_drv); @@ -410,6 +716,7 @@ static int __init p9_virtio_init(void) static void __exit p9_virtio_cleanup(void) { unregister_virtio_driver(&p9_virtio_drv); + v9fs_unregister_trans(&p9_virtio_trans); } module_init(p9_virtio_init); diff --git a/net/9p/util.c b/net/9p/util.c index ef7215565d8..59f278e64f5 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -30,13 +30,26 @@ #include <linux/sched.h> #include <linux/parser.h> #include <linux/idr.h> +#include <linux/slab.h> #include <net/9p/9p.h> +/** + * struct p9_idpool - per-connection accounting for tag idpool + * @lock: protects the pool + * @pool: idr to allocate tag id from + * + */ + struct p9_idpool { spinlock_t lock; struct idr pool; }; +/** + * p9_idpool_create - create a new per-connection id pool + * + */ + struct p9_idpool *p9_idpool_create(void) { struct p9_idpool *p; @@ -52,6 +65,11 @@ struct p9_idpool *p9_idpool_create(void) } EXPORT_SYMBOL(p9_idpool_create); +/** + * p9_idpool_destroy - create a new per-connection id pool + * @p: idpool to destroy + */ + void p9_idpool_destroy(struct p9_idpool *p) { idr_destroy(&p->pool); @@ -61,48 +79,48 @@ EXPORT_SYMBOL(p9_idpool_destroy); /** * p9_idpool_get - allocate numeric id from pool - * @p - pool to allocate from + * @p: pool to allocate from * - * XXX - This seems to be an awful generic function, should it be in idr.c with + * Bugs: This seems to be an awful generic function, should it be in idr.c with * the lock included in struct idr? */ int p9_idpool_get(struct p9_idpool *p) { - int i = 0; - int error; - unsigned int flags; - -retry: - if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) - return 0; + int i; + unsigned long flags; + idr_preload(GFP_NOFS); spin_lock_irqsave(&p->lock, flags); /* no need to store exactly p, we just need something non-null */ - error = idr_get_new(&p->pool, p, &i); - spin_unlock_irqrestore(&p->lock, flags); + i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT); - if (error == -EAGAIN) - goto retry; - else if (error) + spin_unlock_irqrestore(&p->lock, flags); + idr_preload_end(); + if (i < 0) return -1; + p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p); return i; } EXPORT_SYMBOL(p9_idpool_get); /** * p9_idpool_put - release numeric id from pool - * @p - pool to allocate from + * @id: numeric id which is being released + * @p: pool to release id into * - * XXX - This seems to be an awful generic function, should it be in idr.c with + * Bugs: This seems to be an awful generic function, should it be in idr.c with * the lock included in struct idr? */ void p9_idpool_put(int id, struct p9_idpool *p) { - unsigned int flags; + unsigned long flags; + + p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", id, p); + spin_lock_irqsave(&p->lock, flags); idr_remove(&p->pool, id); spin_unlock_irqrestore(&p->lock, flags); @@ -111,11 +129,13 @@ EXPORT_SYMBOL(p9_idpool_put); /** * p9_idpool_check - check if the specified id is available - * @id - id to check - * @p - pool + * @id: id to check + * @p: pool to check */ + int p9_idpool_check(int id, struct p9_idpool *p) { return idr_find(&p->pool, id) != NULL; } EXPORT_SYMBOL(p9_idpool_check); + |
