diff options
Diffstat (limited to 'net/9p')
| -rw-r--r-- | net/9p/Kconfig | 10 | ||||
| -rw-r--r-- | net/9p/Makefile | 1 | ||||
| -rw-r--r-- | net/9p/client.c | 977 | ||||
| -rw-r--r-- | net/9p/error.c | 10 | ||||
| -rw-r--r-- | net/9p/mod.c | 39 | ||||
| -rw-r--r-- | net/9p/protocol.c | 136 | ||||
| -rw-r--r-- | net/9p/protocol.h | 4 | ||||
| -rw-r--r-- | net/9p/trans_common.c | 69 | ||||
| -rw-r--r-- | net/9p/trans_common.h | 17 | ||||
| -rw-r--r-- | net/9p/trans_fd.c | 386 | ||||
| -rw-r--r-- | net/9p/trans_rdma.c | 182 | ||||
| -rw-r--r-- | net/9p/trans_virtio.c | 354 | ||||
| -rw-r--r-- | net/9p/util.c | 23 |
13 files changed, 1498 insertions, 710 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 7ed75c7bd5d..a75174a3372 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -3,8 +3,8 @@ # menuconfig NET_9P - depends on NET && EXPERIMENTAL - tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" + depends on NET + tristate "Plan 9 Resource Sharing Support (9P2000)" help If you say Y here, you will get experimental support for Plan 9 resource sharing via the 9P2000 protocol. @@ -16,14 +16,14 @@ menuconfig NET_9P if NET_9P config NET_9P_VIRTIO - depends on EXPERIMENTAL && VIRTIO - tristate "9P Virtio Transport (Experimental)" + depends on VIRTIO + tristate "9P Virtio Transport" help This builds support for a transports between guest partitions and a host partition. config NET_9P_RDMA - depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL + depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS tristate "9P RDMA Transport (Experimental)" help This builds support for an RDMA transport. diff --git a/net/9p/Makefile b/net/9p/Makefile index 198a640d53a..a0874cc1f71 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o util.o \ protocol.o \ trans_fd.o \ + trans_common.o \ 9pnet_virtio-objs := \ trans_virtio.o \ diff --git a/net/9p/client.c b/net/9p/client.c index a848bca9fbf..0004cbaac4a 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -23,6 +23,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> @@ -38,6 +40,9 @@ #include <net/9p/transport.h> #include "protocol.h" +#define CREATE_TRACE_POINTS +#include <trace/events/9p.h> + /* * Client Option Parsing (code inspired by NFS code) * - a little lazy - parse all client options @@ -71,30 +76,40 @@ inline int p9_is_proto_dotu(struct p9_client *clnt) } EXPORT_SYMBOL(p9_is_proto_dotu); +/* + * Some error codes are taken directly from the server replies, + * make sure they are valid. + */ +static int safe_errno(int err) +{ + if ((err > 0) || (err < -MAX_ERRNO)) { + p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err); + return -EPROTO; + } + return err; +} + + /* Interpret mount option for protocol version */ -static int get_protocol_version(const substring_t *name) +static int get_protocol_version(char *s) { int version = -EINVAL; - if (!strncmp("9p2000", name->from, name->to-name->from)) { + if (!strcmp(s, "9p2000")) { version = p9_proto_legacy; - P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n"); - } else if (!strncmp("9p2000.u", name->from, name->to-name->from)) { + p9_debug(P9_DEBUG_9P, "Protocol version: Legacy\n"); + } else if (!strcmp(s, "9p2000.u")) { version = p9_proto_2000u; - P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); - } else if (!strncmp("9p2000.L", name->from, name->to-name->from)) { + p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); + } else if (!strcmp(s, "9p2000.L")) { version = p9_proto_2000L; - P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); - } else { - P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ", - name->from); - } + p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); + } else + pr_info("Unknown protocol version %s\n", s); + return version; } -static struct p9_req_t * -p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); - /** * parse_options - parse mount options into client structure * @opts: options string passed from mount @@ -109,9 +124,10 @@ static int parse_opts(char *opts, struct p9_client *clnt) char *p; substring_t args[MAX_OPT_ARGS]; int option; + char *s; int ret = 0; - clnt->proto_version = p9_proto_2000u; + clnt->proto_version = p9_proto_2000L; clnt->msize = 8192; if (!opts) @@ -119,47 +135,63 @@ static int parse_opts(char *opts, struct p9_client *clnt) tmp_options = kstrdup(opts, GFP_KERNEL); if (!tmp_options) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option string\n"); + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { - int token; + int token, r; if (!*p) continue; token = match_token(p, tokens, args); - if (token < Opt_trans) { - int r = match_int(&args[0], &option); + switch (token) { + case Opt_msize: + r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); ret = r; continue; } - } - switch (token) { - case Opt_msize: clnt->msize = option; break; case Opt_trans: - clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); - if(clnt->trans_mod == NULL) { - P9_DPRINTK(P9_DEBUG_ERROR, - "Could not find request transport: %s\n", - (char *) &args[0]); + s = match_strdup(&args[0]); + if (!s) { + ret = -ENOMEM; + p9_debug(P9_DEBUG_ERROR, + "problem allocating copy of trans arg\n"); + goto free_and_return; + } + clnt->trans_mod = v9fs_get_trans_by_name(s); + if (clnt->trans_mod == NULL) { + pr_info("Could not find request transport: %s\n", + s); ret = -EINVAL; + kfree(s); goto free_and_return; } + kfree(s); break; case Opt_legacy: clnt->proto_version = p9_proto_legacy; break; case Opt_version: - ret = get_protocol_version(&args[0]); - if (ret == -EINVAL) + s = match_strdup(&args[0]); + if (!s) { + ret = -ENOMEM; + p9_debug(P9_DEBUG_ERROR, + "problem allocating copy of version arg\n"); goto free_and_return; + } + ret = get_protocol_version(s); + if (ret == -EINVAL) { + kfree(s); + goto free_and_return; + } + kfree(s); clnt->proto_version = ret; break; default: @@ -172,13 +204,24 @@ free_and_return: return ret; } +static struct p9_fcall *p9_fcall_alloc(int alloc_msize) +{ + struct p9_fcall *fc; + fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); + if (!fc) + return NULL; + fc->capacity = alloc_msize; + fc->sdata = (char *) fc + sizeof(struct p9_fcall); + return fc; +} + /** * p9_tag_alloc - lookup/allocate a request by tag * @c: client session to lookup tag within * @tag: numeric id for transaction * * this is a simple array lookup, but will grow the - * request_slots as necessary to accomodate transaction + * request_slots as necessary to accommodate transaction * ids which did not previously have a slot. * * this code relies on the client spinlock to manage locks, its @@ -187,11 +230,13 @@ free_and_return: * */ -static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) +static struct p9_req_t * +p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) { unsigned long flags; int row, col; struct p9_req_t *req; + int alloc_msize = min(c->msize, max_size); /* This looks up the original request by tag so we know which * buffer to read the data into */ @@ -206,7 +251,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) sizeof(struct p9_req_t), GFP_ATOMIC); if (!c->reqs[row]) { - printk(KERN_ERR "Couldn't grow tag array\n"); + pr_err("Couldn't grow tag array\n"); spin_unlock_irqrestore(&c->lock, flags); return ERR_PTR(-ENOMEM); } @@ -222,39 +267,36 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) col = tag % P9_ROW_MAXTAG; req = &c->reqs[row][col]; - if (!req->tc) { - req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); - if (!req->wq) { - printk(KERN_ERR "Couldn't grow tag array\n"); - return ERR_PTR(-ENOMEM); - } + if (!req->wq) { + req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); + if (!req->wq) + goto grow_failed; init_waitqueue_head(req->wq); - req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); - req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); - if ((!req->tc) || (!req->rc)) { - printk(KERN_ERR "Couldn't grow tag array\n"); - kfree(req->tc); - kfree(req->rc); - kfree(req->wq); - req->tc = req->rc = NULL; - req->wq = NULL; - return ERR_PTR(-ENOMEM); - } - req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); - req->tc->capacity = c->msize; - req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); - req->rc->capacity = c->msize; } + if (!req->tc) + req->tc = p9_fcall_alloc(alloc_msize); + if (!req->rc) + req->rc = p9_fcall_alloc(alloc_msize); + if (!req->tc || !req->rc) + goto grow_failed; + p9pdu_reset(req->tc); p9pdu_reset(req->rc); req->tc->tag = tag-1; req->status = REQ_STATUS_ALLOC; - return &c->reqs[row][col]; + return req; + +grow_failed: + pr_err("Couldn't grow tag array\n"); + kfree(req->tc); + kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; + return ERR_PTR(-ENOMEM); } /** @@ -272,7 +314,8 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) * buffer to read the data into */ tag++; - BUG_ON(tag >= c->max_tag); + if(tag >= c->max_tag) + return NULL; row = tag / P9_ROW_MAXTAG; col = tag % P9_ROW_MAXTAG; @@ -296,12 +339,13 @@ static int p9_tag_init(struct p9_client *c) c->tagpool = p9_idpool_create(); if (IS_ERR(c->tagpool)) { err = PTR_ERR(c->tagpool); - c->tagpool = NULL; goto error; } - - p9_idpool_get(c->tagpool); /* reserve tag 0 */ - + err = p9_idpool_get(c->tagpool); /* reserve tag 0 */ + if (err < 0) { + p9_idpool_destroy(c->tagpool); + goto error; + } c->max_tag = 0; error: return err; @@ -322,9 +366,9 @@ static void p9_tag_cleanup(struct p9_client *c) for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { for (col = 0; col < P9_ROW_MAXTAG; col++) { if (c->reqs[row][col].status != REQ_STATUS_IDLE) { - P9_DPRINTK(P9_DEBUG_MUX, - "Attempting to cleanup non-free tag %d,%d\n", - row, col); + p9_debug(P9_DEBUG_MUX, + "Attempting to cleanup non-free tag %d,%d\n", + row, col); /* TODO: delay execution of cleanup */ return; } @@ -358,7 +402,7 @@ static void p9_tag_cleanup(struct p9_client *c) static void p9_free_req(struct p9_client *c, struct p9_req_t *r) { int tag = r->tc->tag; - P9_DPRINTK(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); + p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); r->status = REQ_STATUS_IDLE; if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) @@ -371,11 +415,19 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) * req: request received * */ -void p9_client_cb(struct p9_client *c, struct p9_req_t *req) +void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) { - P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + + /* + * This barrier is needed to make sure any change made to req before + * the other thread wakes up will indeed be seen by the waiting side. + */ + smp_wmb(); + req->status = status; + wake_up(req->wq); - P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); + p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); } EXPORT_SYMBOL(p9_client_cb); @@ -410,8 +462,8 @@ p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag, pdu->id = r_type; pdu->tag = r_tag; - P9_DPRINTK(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size, - pdu->id, pdu->tag); + p9_debug(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", + pdu->size, pdu->id, pdu->tag); if (type) *type = r_type; @@ -443,58 +495,152 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) { int8_t type; int err; + int ecode; err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after check errors which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); if (err) { - P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); return err; } + if (type != P9_RERROR && type != P9_RLERROR) + return 0; + + if (!p9_is_proto_dotl(c)) { + char *ename; + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); + if (err) + goto out_err; - if (type == P9_RERROR || type == P9_RLERROR) { - int ecode; + if (p9_is_proto_dotu(c)) + err = -ecode; + + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); + + p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); + } + kfree(ename); + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } + + return err; + +out_err: + p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + + return err; +} + +/** + * p9_check_zc_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * @in_hdrlen: Size of response protocol buffer. + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ - if (!p9_is_proto_dotl(c)) { - char *ename; +static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, + char *uidata, int in_hdrlen, int kern_buf) +{ + int err; + int ecode; + int8_t type; + char *ename = NULL; - err = p9pdu_readf(req->rc, c->proto_version, "s?d", - &ename, &ecode); - if (err) - goto out_err; + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after parse_header which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); + if (err) { + p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + return err; + } - if (p9_is_proto_dotu(c)) - err = -ecode; + if (type != P9_RERROR && type != P9_RLERROR) + return 0; - if (!err || !IS_ERR_VALUE(err)) { - err = p9_errstr2errno(ename, strlen(ename)); + if (!p9_is_proto_dotl(c)) { + /* Error is reported in string format */ + int len; + /* 7 = header size for RERROR; */ + int inline_len = in_hdrlen - 7; - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); + len = req->rc->size - req->rc->offset; + if (len > (P9_ZC_HDR_SZ - 7)) { + err = -EFAULT; + goto out_err; + } - kfree(ename); + ename = &req->rc->sdata[req->rc->offset]; + if (len > inline_len) { + /* We have error in external buffer */ + if (kern_buf) { + memcpy(ename + inline_len, uidata, + len - inline_len); + } else { + err = copy_from_user(ename + inline_len, + uidata, len - inline_len); + if (err) { + err = -EFAULT; + goto out_err; + } } - } else { - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + } + ename = NULL; + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); + if (err) + goto out_err; + + if (p9_is_proto_dotu(c)) err = -ecode; - P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); - } + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); - } else - err = 0; + p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); + } + kfree(ename); + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } return err; out_err: - P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); - + p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); return err; } +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); + /** * p9_client_flush - flush (cancel) a request * @c: client state * @oldreq: request to cancel * - * This sents a flush for a particular requests and links + * This sents a flush for a particular request and links * the flush request to the original request. The current * code only supports a single flush request although the protocol * allows for multiple flush requests to be sent for a single request. @@ -511,43 +657,32 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) if (err) return err; - P9_DPRINTK(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); + p9_debug(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag); if (IS_ERR(req)) return PTR_ERR(req); - - /* if we haven't received a response for oldreq, - remove it from the list. */ - spin_lock(&c->lock); - if (oldreq->status == REQ_STATUS_FLSH) - list_del(&oldreq->req_list); - spin_unlock(&c->lock); + /* + * if we haven't received a response for oldreq, + * remove it from the list + */ + if (oldreq->status == REQ_STATUS_SENT) + if (c->trans_mod->cancelled) + c->trans_mod->cancelled(c, oldreq); p9_free_req(c, req); return 0; } -/** - * p9_client_rpc - issue a request and wait for a response - * @c: client session - * @type: type of request - * @fmt: protocol format string (see protocol.c) - * - * Returns request structure (which client must free using p9_free_req) - */ - -static struct p9_req_t * -p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, + int8_t type, int req_size, + const char *fmt, va_list ap) { - va_list ap; int tag, err; struct p9_req_t *req; - unsigned long flags; - int sigpending; - P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); + p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type); /* we allow for any status other than disconnected */ if (c->status == Disconnected) @@ -557,12 +692,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) return ERR_PTR(-EIO); - if (signal_pending(current)) { - sigpending = 1; - clear_thread_flag(TIF_SIGPENDING); - } else - sigpending = 0; - tag = P9_NOTAG; if (type != P9_TVERSION) { tag = p9_idpool_get(c->tagpool); @@ -570,39 +699,82 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) return ERR_PTR(-ENOMEM); } - req = p9_tag_alloc(c, tag); + req = p9_tag_alloc(c, tag, req_size); if (IS_ERR(req)) return req; /* marshall the data */ p9pdu_prepare(req->tc, tag, type); - va_start(ap, fmt); err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); - va_end(ap); if (err) goto reterr; - p9pdu_finalize(req->tc); + p9pdu_finalize(c, req->tc); + trace_9p_client_req(c, type, tag); + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + req = p9_client_prepare_req(c, type, c->msize, fmt, ap); + va_end(ap); + if (IS_ERR(req)) + return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; err = c->trans_mod->request(c, req); if (err < 0) { - if (err != -ERESTARTSYS) + if (err != -ERESTARTSYS && err != -EFAULT) c->status = Disconnected; goto reterr; } - - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); +again: + /* Wait for the response */ err = wait_event_interruptible(*req->wq, - req->status >= REQ_STATUS_RCVD); - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", - req->wq, tag, err); + req->status >= REQ_STATUS_RCVD); + + /* + * Make sure our req is coherent with regard to updates in other + * threads - echoes to wmb() in the callback + */ + smp_rmb(); + + if ((err == -ERESTARTSYS) && (c->status == Connected) + && (type == P9_TFLUSH)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + goto again; + } if (req->status == REQ_STATUS_ERROR) { - P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } - if ((err == -ERESTARTSYS) && (c->status == Connected)) { - P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); + p9_debug(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; clear_thread_flag(TIF_SIGPENDING); @@ -613,27 +785,104 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if (req->status == REQ_STATUS_RCVD) err = 0; } - if (sigpending) { spin_lock_irqsave(¤t->sighand->siglock, flags); recalc_sigpending(); spin_unlock_irqrestore(¤t->sighand->siglock, flags); } - if (err < 0) goto reterr; err = p9_check_errors(c, req); - if (!err) { - P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(safe_errno(err)); +} + +/** + * p9_client_zc_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ +static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, + char *uidata, char *uodata, + int inlen, int olen, int in_hdrlen, + int kern_buf, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + /* + * We allocate a inline protocol data of only 4k bytes. + * The actual content is passed in zero-copy fashion. + */ + req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); + va_end(ap); + if (IS_ERR(req)) return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; + + /* If we are called with KERNEL_DS force kern_buf */ + if (segment_eq(get_fs(), KERNEL_DS)) + kern_buf = 1; + + err = c->trans_mod->zc_request(c, req, uidata, uodata, + inlen, olen, in_hdrlen, kern_buf); + if (err < 0) { + if (err == -EIO) + c->status = Disconnected; + goto reterr; } + if (req->status == REQ_STATUS_ERROR) { + p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } + if ((err == -ERESTARTSYS) && (c->status == Connected)) { + p9_debug(P9_DEBUG_MUX, "flushing\n"); + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + if (c->trans_mod->cancel(c, req)) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ + if (req->status == REQ_STATUS_RCVD) + err = 0; + } + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + if (err < 0) + goto reterr; + + err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) + return req; reterr: - P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, - err); p9_free_req(c, req); - return ERR_PTR(err); + return ERR_PTR(safe_errno(err)); } static struct p9_fid *p9_fid_create(struct p9_client *clnt) @@ -642,7 +891,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) struct p9_fid *fid; unsigned long flags; - P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt); fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); if (!fid) return ERR_PTR(-ENOMEM); @@ -675,7 +924,7 @@ static void p9_fid_destroy(struct p9_fid *fid) struct p9_client *clnt; unsigned long flags; - P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid); + p9_debug(P9_DEBUG_FID, "fid %d\n", fid->fid); clnt = fid->clnt; p9_idpool_put(fid->fid, clnt->fidpool); spin_lock_irqsave(&clnt->lock, flags); @@ -692,8 +941,8 @@ static int p9_client_version(struct p9_client *c) char *version; int msize; - P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", - c->msize, c->proto_version); + p9_debug(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", + c->msize, c->proto_version); switch (c->proto_version) { case p9_proto_2000L: @@ -718,12 +967,12 @@ static int p9_client_version(struct p9_client *c) err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version); if (err) { - P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); - p9pdu_dump(1, req->rc); + p9_debug(P9_DEBUG_9P, "version error %d\n", err); + trace_9p_protocol_dump(c, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); + p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); if (!strncmp(version, "9P2000.L", 8)) c->proto_version = p9_proto_2000L; else if (!strncmp(version, "9P2000.u", 8)) @@ -749,6 +998,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) { int err; struct p9_client *clnt; + char *client_id; err = 0; clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); @@ -757,41 +1007,46 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) clnt->trans_mod = NULL; clnt->trans = NULL; + + client_id = utsname()->nodename; + memcpy(clnt->name, client_id, strlen(client_id) + 1); + spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); - p9_tag_init(clnt); + err = p9_tag_init(clnt); + if (err < 0) + goto free_client; err = parse_opts(options, clnt); if (err < 0) - goto free_client; + goto destroy_tagpool; if (!clnt->trans_mod) clnt->trans_mod = v9fs_get_default_trans(); if (clnt->trans_mod == NULL) { err = -EPROTONOSUPPORT; - P9_DPRINTK(P9_DEBUG_ERROR, - "No transport defined or default transport\n"); - goto free_client; + p9_debug(P9_DEBUG_ERROR, + "No transport defined or default transport\n"); + goto destroy_tagpool; } clnt->fidpool = p9_idpool_create(); if (IS_ERR(clnt->fidpool)) { err = PTR_ERR(clnt->fidpool); - clnt->fidpool = NULL; goto put_trans; } - P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", - clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); + p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", + clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); err = clnt->trans_mod->create(clnt, dev_name, options); if (err) goto destroy_fidpool; - if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize) - clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ; + if (clnt->msize > clnt->trans_mod->maxsize) + clnt->msize = clnt->trans_mod->maxsize; err = p9_client_version(clnt); if (err) @@ -805,6 +1060,8 @@ destroy_fidpool: p9_idpool_destroy(clnt->fidpool); put_trans: v9fs_put_trans(clnt->trans_mod); +destroy_tagpool: + p9_idpool_destroy(clnt->tagpool); free_client: kfree(clnt); return ERR_PTR(err); @@ -815,7 +1072,7 @@ void p9_client_destroy(struct p9_client *clnt) { struct p9_fid *fid, *fidptr; - P9_DPRINTK(P9_DEBUG_MUX, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_MUX, "clnt %p\n", clnt); if (clnt->trans_mod) clnt->trans_mod->close(clnt); @@ -823,7 +1080,7 @@ void p9_client_destroy(struct p9_client *clnt) v9fs_put_trans(clnt->trans_mod); list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) { - printk(KERN_INFO "Found fid %d not clunked\n", fid->fid); + pr_info("Found fid %d not clunked\n", fid->fid); p9_fid_destroy(fid); } @@ -838,30 +1095,29 @@ EXPORT_SYMBOL(p9_client_destroy); void p9_client_disconnect(struct p9_client *clnt) { - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt); clnt->status = Disconnected; } EXPORT_SYMBOL(p9_client_disconnect); void p9_client_begin_disconnect(struct p9_client *clnt) { - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt); clnt->status = BeginDisconnect; } EXPORT_SYMBOL(p9_client_begin_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, - char *uname, u32 n_uname, char *aname) + char *uname, kuid_t n_uname, char *aname) { - int err; + int err = 0; struct p9_req_t *req; struct p9_fid *fid; struct p9_qid qid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", - afid ? afid->fid : -1, uname, aname); - err = 0; + p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", + afid ? afid->fid : -1, uname, aname); fid = p9_fid_create(clnt); if (IS_ERR(fid)) { err = PTR_ERR(fid); @@ -869,7 +1125,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid, + req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid, afid ? afid->fid : P9_NOFID, uname, aname, n_uname); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -878,15 +1134,13 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", - qid.type, - (unsigned long long)qid.path, - qid.version); + p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", + qid.type, (unsigned long long)qid.path, qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -900,15 +1154,15 @@ error: } EXPORT_SYMBOL(p9_client_attach); -struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, - int clone) +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, + char **wnames, int clone) { int err; struct p9_client *clnt; struct p9_fid *fid; struct p9_qid *wqids; struct p9_req_t *req; - int16_t nwqids, count; + uint16_t nwqids, count; err = 0; wqids = NULL; @@ -926,8 +1180,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, fid = oldfid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n", - oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); + p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", + oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, nwname, wnames); @@ -938,13 +1192,13 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto clunk_fid; } p9_free_req(clnt, req); - P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); + p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); if (nwqids != nwname) { err = -ENOENT; @@ -952,7 +1206,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, } for (count = 0; count < nwqids; count++) - P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", + p9_debug(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", count, wqids[count].type, (unsigned long long)wqids[count].path, wqids[count].version); @@ -987,7 +1241,7 @@ int p9_client_open(struct p9_fid *fid, int mode) int iounit; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", + p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode); err = 0; @@ -1005,11 +1259,11 @@ int p9_client_open(struct p9_fid *fid, int mode) err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", + p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type, (unsigned long long)qid.path, qid.version, iounit); @@ -1024,22 +1278,23 @@ error: EXPORT_SYMBOL(p9_client_open); int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, - gid_t gid, struct p9_qid *qid) + kgid_t gid, struct p9_qid *qid) { int err = 0; struct p9_client *clnt; struct p9_req_t *req; int iounit; - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", - ofid->fid, name, flags, mode, gid); + ofid->fid, name, flags, mode, + from_kgid(&init_user_ns, gid)); clnt = ofid->clnt; if (ofid->mode != -1) return -EINVAL; - req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags, + req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags, mode, gid); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1048,11 +1303,11 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", + p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", qid->type, (unsigned long long)qid->path, qid->version, iounit); @@ -1076,7 +1331,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, struct p9_qid qid; int iounit; - P9_DPRINTK(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", + p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", fid->fid, name, perm, mode); err = 0; clnt = fid->clnt; @@ -1093,11 +1348,11 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", + p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", qid.type, (unsigned long long)qid.path, qid.version, iounit); @@ -1112,18 +1367,18 @@ error: } EXPORT_SYMBOL(p9_client_fcreate); -int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, +int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid, struct p9_qid *qid) { int err = 0; struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", + p9_debug(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", dfid->fid, name, symtgt); clnt = dfid->clnt; - req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt, + req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt, gid); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1132,11 +1387,11 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", + p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", qid->type, (unsigned long long)qid->path, qid->version); free_and_error: @@ -1151,7 +1406,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", + p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", dfid->fid, oldfid->fid, newname); clnt = dfid->clnt; req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid, @@ -1159,7 +1414,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) if (IS_ERR(req)) return PTR_ERR(req); - P9_DPRINTK(P9_DEBUG_9P, "<<< RLINK\n"); + p9_debug(P9_DEBUG_9P, "<<< RLINK\n"); p9_free_req(clnt, req); return 0; } @@ -1171,7 +1426,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync) struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", + p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", fid->fid, datasync); err = 0; clnt = fid->clnt; @@ -1182,7 +1437,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid); p9_free_req(clnt, req); @@ -1196,14 +1451,18 @@ int p9_client_clunk(struct p9_fid *fid) int err; struct p9_client *clnt; struct p9_req_t *req; + int retries = 0; if (!fid) { - P9_EPRINTK(KERN_WARNING, "Trying to clunk with NULL fid\n"); + pr_warn("%s (%d): Trying to clunk with NULL fid\n", + __func__, task_pid_nr(current)); dump_stack(); return 0; } - P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); +again: + p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid, + retries); err = 0; clnt = fid->clnt; @@ -1213,12 +1472,20 @@ int p9_client_clunk(struct p9_fid *fid) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); p9_free_req(clnt, req); - p9_fid_destroy(fid); - error: + /* + * Fid is not valid even after a failed clunk + * If interrupted, retry once then give up and + * leak fid until umount. + */ + if (err == -ERESTARTSYS) { + if (retries++ == 0) + goto again; + } else + p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_clunk); @@ -1229,7 +1496,7 @@ int p9_client_remove(struct p9_fid *fid) struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); err = 0; clnt = fid->clnt; @@ -1239,29 +1506,56 @@ int p9_client_remove(struct p9_fid *fid) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); p9_free_req(clnt, req); error: - p9_fid_destroy(fid); + if (err == -ERESTARTSYS) + p9_client_clunk(fid); + else + p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_remove); +int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) +{ + int err = 0; + struct p9_req_t *req; + struct p9_client *clnt; + + p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", + dfid->fid, name, flags); + + clnt = dfid->clnt; + req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name); + + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_unlinkat); + int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count) { - int err, rsize, total; - struct p9_client *clnt; - struct p9_req_t *req; char *dataptr; + int kernel_buf = 0; + struct p9_req_t *req; + struct p9_client *clnt; + int err, rsize, non_zc = 0; + - P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, - (long long unsigned) offset, count); + p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", + fid->fid, (unsigned long long) offset, count); err = 0; clnt = fid->clnt; - total = 0; rsize = fid->iounit; if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) @@ -1270,7 +1564,26 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, if (count < rsize) rsize = count; - req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *indata; + if (data) { + kernel_buf = 1; + indata = data; + } else + indata = (__force char *)udata; + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0, + 11, kernel_buf, "dqd", fid->fid, + offset, rsize); + } else { + non_zc = 1; + req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, + rsize); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1278,19 +1591,21 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - if (data) { - memmove(data, dataptr, count); - } else { - err = copy_to_user(udata, dataptr, count); - if (err) { - err = -EFAULT; - goto free_and_error; + if (non_zc) { + if (data) { + memmove(data, dataptr, count); + } else { + err = copy_to_user(udata, dataptr, count); + if (err) { + err = -EFAULT; + goto free_and_error; + } } } p9_free_req(clnt, req); @@ -1307,15 +1622,15 @@ int p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, u64 offset, u32 count) { - int err, rsize, total; + int err, rsize; + int kernel_buf = 0; struct p9_client *clnt; struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", - fid->fid, (long long unsigned) offset, count); + p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", + fid->fid, (unsigned long long) offset, count); err = 0; clnt = fid->clnt; - total = 0; rsize = fid->iounit; if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) @@ -1323,12 +1638,26 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, if (count < rsize) rsize = count; - if (data) - req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, - rsize, data); - else - req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, - rsize, udata); + + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *odata; + if (data) { + kernel_buf = 1; + odata = data; + } else + odata = (char *)udata; + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize, + P9_ZC_HDR_SZ, kernel_buf, "dqd", + fid->fid, offset, rsize); + } else { + if (data) + req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, + offset, rsize, data); + else + req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, + offset, rsize, udata); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1336,11 +1665,11 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); p9_free_req(clnt, req); return count; @@ -1360,7 +1689,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) struct p9_req_t *req; u16 ignored; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); if (!ret) return ERR_PTR(-ENOMEM); @@ -1376,12 +1705,12 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n" "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" @@ -1390,7 +1719,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, ret->atime, ret->mtime, (unsigned long long)ret->length, ret->name, ret->uid, ret->gid, ret->muid, ret->extension, - ret->n_uid, ret->n_gid, ret->n_muid); + from_kuid(&init_user_ns, ret->n_uid), + from_kgid(&init_user_ns, ret->n_gid), + from_kuid(&init_user_ns, ret->n_muid)); p9_free_req(clnt, req); return ret; @@ -1410,7 +1741,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, GFP_KERNEL); struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", + p9_debug(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", fid->fid, request_mask); if (!ret) @@ -1427,12 +1758,12 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, "<<< RGETATTR st_result_mask=%lld\n" "<<< qid=%x.%llx.%x\n" "<<< st_mode=%8.8x st_nlink=%llu\n" @@ -1444,8 +1775,10 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, "<<< st_btime_sec=%lld st_btime_nsec=%lld\n" "<<< st_gen=%lld st_data_version=%lld", ret->st_result_mask, ret->qid.type, ret->qid.path, - ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid, - ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize, + ret->qid.version, ret->st_mode, ret->st_nlink, + from_kuid(&init_user_ns, ret->st_uid), + from_kgid(&init_user_ns, ret->st_gid), + ret->st_rdev, ret->st_size, ret->st_blksize, ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec, ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec, ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec, @@ -1498,8 +1831,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) err = 0; clnt = fid->clnt; wst->size = p9_client_statsize(wst, clnt->proto_version); - P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" @@ -1508,7 +1841,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, wst->atime, wst->mtime, (unsigned long long)wst->length, wst->name, wst->uid, wst->gid, wst->muid, wst->extension, - wst->n_uid, wst->n_gid, wst->n_muid); + from_kuid(&init_user_ns, wst->n_uid), + from_kgid(&init_user_ns, wst->n_gid), + from_kuid(&init_user_ns, wst->n_muid)); req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst); if (IS_ERR(req)) { @@ -1516,7 +1851,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); p9_free_req(clnt, req); error: @@ -1532,12 +1867,14 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, " valid=%x mode=%x uid=%d gid=%d size=%lld\n" " atime_sec=%lld atime_nsec=%lld\n" " mtime_sec=%lld mtime_nsec=%lld\n", - p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid, + p9attr->valid, p9attr->mode, + from_kuid(&init_user_ns, p9attr->uid), + from_kgid(&init_user_ns, p9attr->gid), p9attr->size, p9attr->atime_sec, p9attr->atime_nsec, p9attr->mtime_sec, p9attr->mtime_nsec); @@ -1547,7 +1884,7 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) err = PTR_ERR(req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); p9_free_req(clnt, req); error: return err; @@ -1563,7 +1900,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid); req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid); if (IS_ERR(req)) { @@ -1575,12 +1912,12 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail, &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld " + p9_debug(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld " "blocks %llu bfree %llu bavail %llu files %llu ffree %llu " "fsid %llu namelen %ld\n", fid->fid, (long unsigned int)sb->type, (long int)sb->bsize, @@ -1593,7 +1930,8 @@ error: } EXPORT_SYMBOL(p9_client_statfs); -int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name) +int p9_client_rename(struct p9_fid *fid, + struct p9_fid *newdirfid, const char *name) { int err; struct p9_req_t *req; @@ -1602,7 +1940,7 @@ int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", + p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", fid->fid, newdirfid->fid, name); req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid, @@ -1612,7 +1950,7 @@ int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid); p9_free_req(clnt, req); error: @@ -1620,6 +1958,36 @@ error: } EXPORT_SYMBOL(p9_client_rename); +int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, + struct p9_fid *newdirfid, const char *new_name) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + clnt = olddirfid->clnt; + + p9_debug(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s" + " newdirfid %d new name %s\n", olddirfid->fid, old_name, + newdirfid->fid, new_name); + + req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid, + old_name, newdirfid->fid, new_name); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", + newdirfid->fid, new_name); + + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_renameat); + /* * An xattrwalk without @attr_name gives the fid for the lisxattr namespace */ @@ -1639,7 +2007,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, attr_fid = NULL; goto error; } - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n", file_fid->fid, attr_fid->fid, attr_name); @@ -1651,12 +2019,12 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, } err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto clunk_fid; } p9_free_req(clnt, req); - P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", + p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", attr_fid->fid, *attr_size); return attr_fid; clunk_fid: @@ -1677,7 +2045,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name, struct p9_req_t *req; struct p9_client *clnt; - P9_DPRINTK(P9_DEBUG_9P, + p9_debug(P9_DEBUG_9P, ">>> TXATTRCREATE fid %d name %s size %lld flag %d\n", fid->fid, name, (long long)attr_size, flags); err = 0; @@ -1688,7 +2056,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name, err = PTR_ERR(req); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); p9_free_req(clnt, req); error: return err; @@ -1697,17 +2065,16 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { - int err, rsize, total; + int err, rsize, non_zc = 0; struct p9_client *clnt; struct p9_req_t *req; char *dataptr; - P9_DPRINTK(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", - fid->fid, (long long unsigned) offset, count); + p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", + fid->fid, (unsigned long long) offset, count); err = 0; clnt = fid->clnt; - total = 0; rsize = fid->iounit; if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ) @@ -1716,7 +2083,19 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) if (count < rsize) rsize = count; - req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0, + 11, 1, "dqd", fid->fid, offset, rsize); + } else { + non_zc = 1; + req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, + offset, rsize); + } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1724,13 +2103,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); + p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); - if (data) + if (non_zc) memmove(data, dataptr, count); p9_free_req(clnt, req); @@ -1744,7 +2123,7 @@ error: EXPORT_SYMBOL(p9_client_readdir); int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, - dev_t rdev, gid_t gid, struct p9_qid *qid) + dev_t rdev, kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; @@ -1752,19 +2131,19 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " + p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); - req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode, + req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev), gid); if (IS_ERR(req)) return PTR_ERR(req); err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, + p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, (unsigned long long)qid->path, qid->version); error: @@ -1775,7 +2154,7 @@ error: EXPORT_SYMBOL(p9_client_mknod_dotl); int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, - gid_t gid, struct p9_qid *qid) + kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; @@ -1783,19 +2162,19 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", - fid->fid, name, mode, gid); - req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode, + p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", + fid->fid, name, mode, from_kgid(&init_user_ns, gid)); + req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode, gid); if (IS_ERR(req)) return PTR_ERR(req); err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, + p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, (unsigned long long)qid->path, qid->version); error: @@ -1813,7 +2192,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d " + p9_debug(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d " "start %lld length %lld proc_id %d client_id %s\n", fid->fid, flock->type, flock->flags, flock->start, flock->length, flock->proc_id, flock->client_id); @@ -1827,10 +2206,10 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) err = p9pdu_readf(req->rc, clnt->proto_version, "b", status); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); + p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); error: p9_free_req(clnt, req); return err; @@ -1846,7 +2225,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld " + p9_debug(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld " "length %lld proc_id %d client_id %s\n", fid->fid, glock->type, glock->start, glock->length, glock->proc_id, glock->client_id); @@ -1860,10 +2239,10 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) &glock->start, &glock->length, &glock->proc_id, &glock->client_id); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " + p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " "proc_id %d client_id %s\n", glock->type, glock->start, glock->length, glock->proc_id, glock->client_id); error: @@ -1880,7 +2259,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target) err = 0; clnt = fid->clnt; - P9_DPRINTK(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid); + p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid); req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid); if (IS_ERR(req)) @@ -1888,10 +2267,10 @@ int p9_client_readlink(struct p9_fid *fid, char **target) err = p9pdu_readf(req->rc, clnt->proto_version, "s", target); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); + p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); error: p9_free_req(clnt, req); return err; diff --git a/net/9p/error.c b/net/9p/error.c index 52518512a93..126fd0dceea 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -27,6 +27,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/list.h> #include <linux/jhash.h> @@ -219,15 +221,13 @@ EXPORT_SYMBOL(p9_error_init); int p9_errstr2errno(char *errstr, int len) { int errno; - struct hlist_node *p; struct errormap *c; int bucket; errno = 0; - p = NULL; c = NULL; bucket = jhash(errstr, len, 0) % ERRHASHSZ; - hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { + hlist_for_each_entry(c, &hash_errmap[bucket], list) { if (c->namelen == len && !memcmp(c->name, errstr, len)) { errno = c->val; break; @@ -237,8 +237,8 @@ int p9_errstr2errno(char *errstr, int len) if (errno == 0) { /* TODO: if error isn't found, add it dynamically */ errstr[len] = 0; - printk(KERN_ERR "%s: server reported unknown error %s\n", - __func__, errstr); + pr_err("%s: server reported unknown error %s\n", + __func__, errstr); errno = ESERVERFAULT; } diff --git a/net/9p/mod.c b/net/9p/mod.c index cf8a4128cd5..6ab36aea772 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -24,7 +24,11 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h> #include <linux/moduleparam.h> #include <net/9p/9p.h> #include <linux/fs.h> @@ -39,6 +43,29 @@ unsigned int p9_debug_level = 0; /* feature-rific global debug level */ EXPORT_SYMBOL(p9_debug_level); module_param_named(debug, p9_debug_level, uint, 0); MODULE_PARM_DESC(debug, "9P debugging level"); + +void _p9_debug(enum p9_debug_flags level, const char *func, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if ((p9_debug_level & level) != level) + return; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (level == P9_DEBUG_9P) + pr_notice("(%8.8d) %pV", task_pid_nr(current), &vaf); + else + pr_notice("-- %s (%d): %pV", func, task_pid_nr(current), &vaf); + + va_end(args); +} +EXPORT_SYMBOL(_p9_debug); #endif /* @@ -80,14 +107,14 @@ EXPORT_SYMBOL(v9fs_unregister_trans); * @name: string identifying transport * */ -struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name) +struct p9_trans_module *v9fs_get_trans_by_name(char *s) { struct p9_trans_module *t, *found = NULL; spin_lock(&v9fs_trans_lock); list_for_each_entry(t, &v9fs_trans_list, list) - if (strncmp(t->name, name->from, name->to-name->from) == 0 && + if (strcmp(t->name, s) == 0 && try_module_get(t->owner)) { found = t; break; @@ -139,7 +166,7 @@ void v9fs_put_trans(struct p9_trans_module *m) } /** - * v9fs_init - Initialize module + * init_p9 - Initialize module * */ static int __init init_p9(void) @@ -147,20 +174,20 @@ static int __init init_p9(void) int ret = 0; p9_error_init(); - printk(KERN_INFO "Installing 9P2000 support\n"); + pr_info("Installing 9P2000 support\n"); p9_trans_fd_init(); return ret; } /** - * v9fs_init - shutdown module + * exit_p9 - shutdown module * */ static void __exit exit_p9(void) { - printk(KERN_INFO "Unloading 9P2000 support\n"); + pr_info("Unloading 9P2000 support\n"); p9_trans_fd_exit(); } diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 1e308f21092..ab9127ec5b7 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -37,46 +37,11 @@ #include <net/9p/client.h> #include "protocol.h" +#include <trace/events/9p.h> + static int p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); -#ifdef CONFIG_NET_9P_DEBUG -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ - int i, n; - u8 *data = pdu->sdata; - int datalen = pdu->size; - char buf[255]; - int buflen = 255; - - i = n = 0; - if (datalen > (buflen-16)) - datalen = buflen-16; - while (i < datalen) { - n += scnprintf(buf + n, buflen - n, "%02x ", data[i]); - if (i%4 == 3) - n += scnprintf(buf + n, buflen - n, " "); - if (i%32 == 31) - n += scnprintf(buf + n, buflen - n, "\n"); - - i++; - } - n += scnprintf(buf + n, buflen - n, "\n"); - - if (way) - P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf); - else - P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf); -} -#else -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ -} -#endif -EXPORT_SYMBOL(p9pdu_dump); - void p9stat_free(struct p9_wstat *stbuf) { kfree(stbuf->name); @@ -87,7 +52,7 @@ void p9stat_free(struct p9_wstat *stbuf) } EXPORT_SYMBOL(p9stat_free); -static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) { size_t len = min(pdu->size - pdu->offset, size); memcpy(data, &pdu->sdata[pdu->offset], len); @@ -120,6 +85,8 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) d - int32_t q - int64_t s - string + u - numeric uid + g - numeric gid S - stat Q - qid D - data blob (int32_t size followed by void *, results are not freed) @@ -185,7 +152,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, if (errcode) break; - *sptr = kmalloc(len + 1, GFP_KERNEL); + *sptr = kmalloc(len + 1, GFP_NOFS); if (*sptr == NULL) { errcode = -EFAULT; break; @@ -198,6 +165,26 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, (*sptr)[len] = 0; } break; + case 'u': { + kuid_t *uid = va_arg(ap, kuid_t *); + __le32 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { + errcode = -EFAULT; + break; + } + *uid = make_kuid(&init_user_ns, + le32_to_cpu(le_val)); + } break; + case 'g': { + kgid_t *gid = va_arg(ap, kgid_t *); + __le32 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { + errcode = -EFAULT; + break; + } + *gid = make_kgid(&init_user_ns, + le32_to_cpu(le_val)); + } break; case 'Q':{ struct p9_qid *qid = va_arg(ap, struct p9_qid *); @@ -212,11 +199,12 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, va_arg(ap, struct p9_wstat *); memset(stbuf, 0, sizeof(struct p9_wstat)); - stbuf->n_uid = stbuf->n_gid = stbuf->n_muid = - -1; + stbuf->n_uid = stbuf->n_muid = INVALID_UID; + stbuf->n_gid = INVALID_GID; + errcode = p9pdu_readf(pdu, proto_version, - "wwdQdddqssss?sddd", + "wwdQdddqssss?sugu", &stbuf->size, &stbuf->type, &stbuf->dev, &stbuf->qid, &stbuf->mode, &stbuf->atime, @@ -245,7 +233,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, } break; case 'T':{ - int16_t *nwname = va_arg(ap, int16_t *); + uint16_t *nwname = va_arg(ap, uint16_t *); char ***wnames = va_arg(ap, char ***); errcode = p9pdu_readf(pdu, proto_version, @@ -253,7 +241,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, if (!errcode) { *wnames = kmalloc(sizeof(char *) * *nwname, - GFP_KERNEL); + GFP_NOFS); if (!*wnames) errcode = -ENOMEM; } @@ -297,7 +285,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, *wqids = kmalloc(*nwqid * sizeof(struct p9_qid), - GFP_KERNEL); + GFP_NOFS); if (*wqids == NULL) errcode = -ENOMEM; } @@ -329,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, memset(stbuf, 0, sizeof(struct p9_stat_dotl)); errcode = p9pdu_readf(pdu, proto_version, - "qQdddqqqqqqqqqqqqqqq", + "qQdugqqqqqqqqqqqqqqq", &stbuf->st_result_mask, &stbuf->qid, &stbuf->st_mode, @@ -403,7 +391,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, const char *sptr = va_arg(ap, const char *); uint16_t len = 0; if (sptr) - len = min_t(uint16_t, strlen(sptr), + len = min_t(size_t, strlen(sptr), USHRT_MAX); errcode = p9pdu_writef(pdu, proto_version, @@ -412,6 +400,20 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; + case 'u': { + kuid_t uid = va_arg(ap, kuid_t); + __le32 val = cpu_to_le32( + from_kuid(&init_user_ns, uid)); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } break; + case 'g': { + kgid_t gid = va_arg(ap, kgid_t); + __le32 val = cpu_to_le32( + from_kgid(&init_user_ns, gid)); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } break; case 'Q':{ const struct p9_qid *qid = va_arg(ap, const struct p9_qid *); @@ -425,7 +427,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, va_arg(ap, const struct p9_wstat *); errcode = p9pdu_writef(pdu, proto_version, - "wwdQdddqssss?sddd", + "wwdQdddqssss?sugu", stbuf->size, stbuf->type, stbuf->dev, &stbuf->qid, stbuf->mode, stbuf->atime, @@ -456,7 +458,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, } break; case 'T':{ - int16_t nwname = va_arg(ap, int); + uint16_t nwname = va_arg(ap, int); const char **wnames = va_arg(ap, const char **); errcode = p9pdu_writef(pdu, proto_version, "w", @@ -503,7 +505,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, struct p9_iattr_dotl *); errcode = p9pdu_writef(pdu, proto_version, - "ddddqqqqq", + "ddugqqqqq", p9attr->valid, p9attr->mode, p9attr->uid, @@ -557,7 +559,7 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) return ret; } -int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) +int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) { struct p9_fcall fake_pdu; int ret; @@ -567,10 +569,10 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, proto_version, "S", st); + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st); if (ret) { - P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); - p9pdu_dump(1, &fake_pdu); + p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); + trace_9p_protocol_dump(clnt, &fake_pdu); } return ret; @@ -579,10 +581,11 @@ EXPORT_SYMBOL(p9stat_read); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) { + pdu->id = type; return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); } -int p9pdu_finalize(struct p9_fcall *pdu) +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu) { int size = pdu->size; int err; @@ -591,13 +594,9 @@ int p9pdu_finalize(struct p9_fcall *pdu) err = p9pdu_writef(pdu, 0, "d", size); pdu->size = size; -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) - p9pdu_dump(0, pdu); -#endif - - P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, - pdu->id, pdu->tag); + trace_9p_protocol_dump(clnt, pdu); + p9_debug(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", + pdu->size, pdu->id, pdu->tag); return err; } @@ -608,8 +607,8 @@ void p9pdu_reset(struct p9_fcall *pdu) pdu->size = 0; } -int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, - int proto_version) +int p9dirent_read(struct p9_client *clnt, char *buf, int len, + struct p9_dirent *dirent) { struct p9_fcall fake_pdu; int ret; @@ -620,15 +619,16 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid, - &dirent->d_off, &dirent->d_type, &nameptr); + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid, + &dirent->d_off, &dirent->d_type, &nameptr); if (ret) { - P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); - p9pdu_dump(1, &fake_pdu); + p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); + trace_9p_protocol_dump(clnt, &fake_pdu); goto out; } strcpy(dirent->d_name, nameptr); + kfree(nameptr); out: return fake_pdu.offset; diff --git a/net/9p/protocol.h b/net/9p/protocol.h index 2431c0f38d5..2cc525fa49f 100644 --- a/net/9p/protocol.h +++ b/net/9p/protocol.h @@ -29,6 +29,6 @@ int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, va_list ap); int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); -int p9pdu_finalize(struct p9_fcall *pdu); -void p9pdu_dump(int, struct p9_fcall *); +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu); void p9pdu_reset(struct p9_fcall *pdu); +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size); diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c new file mode 100644 index 00000000000..2ee3879161b --- /dev/null +++ b/net/9p/trans_common.c @@ -0,0 +1,69 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <linux/scatterlist.h> +#include "trans_common.h" + +/** + * p9_release_req_pages - Release pages after the transaction. + */ +void p9_release_pages(struct page **pages, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) + if (pages[i]) + put_page(pages[i]); +} +EXPORT_SYMBOL(p9_release_pages); + +/** + * p9_nr_pages - Return number of pages needed to accommodate the payload. + */ +int p9_nr_pages(char *data, int len) +{ + unsigned long start_page, end_page; + start_page = (unsigned long)data >> PAGE_SHIFT; + end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + return end_page - start_page; +} +EXPORT_SYMBOL(p9_nr_pages); + +/** + * payload_gup - Translates user buffer into kernel pages and + * pins them either for read/write through get_user_pages_fast(). + * @req: Request to be sent to server. + * @pdata_off: data offset into the first page after translation (gup). + * @pdata_len: Total length of the IO. gup may not return requested # of pages. + * @nr_pages: number of pages to accommodate the payload + * @rw: Indicates if the pages are for read or write. + */ + +int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write) +{ + int nr_mapped_pages; + + nr_mapped_pages = get_user_pages_fast((unsigned long)data, + *nr_pages, write, pages); + if (nr_mapped_pages <= 0) + return nr_mapped_pages; + + *nr_pages = nr_mapped_pages; + return 0; +} +EXPORT_SYMBOL(p9_payload_gup); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h new file mode 100644 index 00000000000..173bb550a9e --- /dev/null +++ b/net/9p/trans_common.h @@ -0,0 +1,17 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +void p9_release_pages(struct page **, int); +int p9_payload_gup(char *, int *, struct page **, int); +int p9_nr_pages(char *, int); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 078eb162d9b..80d08f6664c 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -25,6 +25,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> @@ -61,20 +63,7 @@ struct p9_fd_opts { int rfd; int wfd; u16 port; -}; - -/** - * struct p9_trans_fd - transport state - * @rd: reference to file to read from - * @wr: reference of file to write to - * @conn: connection state reference - * - */ - -struct p9_trans_fd { - struct file *rd; - struct file *wr; - struct p9_conn *conn; + int privport; }; /* @@ -85,12 +74,15 @@ struct p9_trans_fd { enum { /* Options that take integer arguments */ Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, + /* Options that take no arguments */ + Opt_privport, }; static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, + {Opt_privport, "privport"}, {Opt_err, NULL}, }; @@ -153,10 +145,28 @@ struct p9_conn { unsigned long wsched; }; +/** + * struct p9_trans_fd - transport state + * @rd: reference to file to read from + * @wr: reference of file to write to + * @conn: connection state reference + * + */ + +struct p9_trans_fd { + struct file *rd; + struct file *wr; + struct p9_conn conn; +}; + +static void p9_poll_workfn(struct work_struct *work); + static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); -static struct workqueue_struct *p9_mux_wq; -static struct task_struct *p9_poll_task; +static DECLARE_WORK(p9_poll_work, p9_poll_workfn); + +static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; +static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; static void p9_mux_poll_stop(struct p9_conn *m) { @@ -190,7 +200,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err) unsigned long flags; LIST_HEAD(cancel_list); - P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); + p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); spin_lock_irqsave(&m->client->lock, flags); @@ -202,23 +212,19 @@ static void p9_conn_cancel(struct p9_conn *m, int err) m->err = err; list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { - req->status = REQ_STATUS_ERROR; - if (!req->t_err) - req->t_err = err; list_move(&req->req_list, &cancel_list); } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { - req->status = REQ_STATUS_ERROR; - if (!req->t_err) - req->t_err = err; list_move(&req->req_list, &cancel_list); } spin_unlock_irqrestore(&m->client->lock, flags); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { - P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req); + p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); - p9_client_cb(m->client, req); + if (!req->t_err) + req->t_err = err; + p9_client_cb(m->client, req, REQ_STATUS_ERROR); } } @@ -234,10 +240,10 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) if (!ts) return -EREMOTEIO; - if (!ts->rd->f_op || !ts->rd->f_op->poll) + if (!ts->rd->f_op->poll) return -EIO; - if (!ts->wr->f_op || !ts->wr->f_op->poll) + if (!ts->wr->f_op->poll) return -EIO; ret = ts->rd->f_op->poll(ts->rd, pt); @@ -274,7 +280,7 @@ static int p9_fd_read(struct p9_client *client, void *v, int len) return -EREMOTEIO; if (!(ts->rd->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n"); + p9_debug(P9_DEBUG_ERROR, "blocking read ...\n"); ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) @@ -292,13 +298,14 @@ static void p9_read_work(struct work_struct *work) { int n, err; struct p9_conn *m; + int status = REQ_STATUS_ERROR; m = container_of(work, struct p9_conn, rq); if (m->err < 0) return; - P9_DPRINTK(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); + p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); if (!m->rbuf) { m->rbuf = m->tmp_buf; @@ -307,14 +314,13 @@ static void p9_read_work(struct work_struct *work) } clear_bit(Rpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", m, - m->rpos, m->rsize, m->rsize-m->rpos); + p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", + m, m->rpos, m->rsize, m->rsize-m->rpos); err = p9_fd_read(m->client, m->rbuf + m->rpos, m->rsize - m->rpos); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); + p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); if (err == -EAGAIN) { - clear_bit(Rworksched, &m->wsched); - return; + goto end_clear; } if (err <= 0) @@ -324,32 +330,31 @@ static void p9_read_work(struct work_struct *work) if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */ u16 tag; - P9_DPRINTK(P9_DEBUG_TRANS, "got new header\n"); + p9_debug(P9_DEBUG_TRANS, "got new header\n"); n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */ if (n >= m->client->msize) { - P9_DPRINTK(P9_DEBUG_ERROR, - "requested packet size too big: %d\n", n); + p9_debug(P9_DEBUG_ERROR, + "requested packet size too big: %d\n", n); err = -EIO; goto error; } tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */ - P9_DPRINTK(P9_DEBUG_TRANS, - "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); + p9_debug(P9_DEBUG_TRANS, + "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); m->req = p9_tag_lookup(m->client, tag); - if (!m->req || (m->req->status != REQ_STATUS_SENT && - m->req->status != REQ_STATUS_FLSH)) { - P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", - tag); + if (!m->req || (m->req->status != REQ_STATUS_SENT)) { + p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", + tag); err = -EIO; goto error; } if (m->req->rc == NULL) { m->req->rc = kmalloc(sizeof(struct p9_fcall) + - m->client->msize, GFP_KERNEL); + m->client->msize, GFP_NOFS); if (!m->req->rc) { m->req = NULL; err = -ENOMEM; @@ -363,32 +368,33 @@ static void p9_read_work(struct work_struct *work) /* not an else because some packets (like clunk) have no payload */ if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ - P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n"); + p9_debug(P9_DEBUG_TRANS, "got new packet\n"); spin_lock(&m->client->lock); if (m->req->status != REQ_STATUS_ERROR) - m->req->status = REQ_STATUS_RCVD; + status = REQ_STATUS_RCVD; list_del(&m->req->req_list); spin_unlock(&m->client->lock); - p9_client_cb(m->client, m->req); + p9_client_cb(m->client, m->req, status); m->rbuf = NULL; m->rpos = 0; m->rsize = 0; m->req = NULL; } +end_clear: + clear_bit(Rworksched, &m->wsched); + if (!list_empty(&m->req_list)) { if (test_and_clear_bit(Rpending, &m->wsched)) n = POLLIN; else n = p9_fd_poll(m->client, NULL); - if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); - } else - clear_bit(Rworksched, &m->wsched); - } else - clear_bit(Rworksched, &m->wsched); + if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { + p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); + schedule_work(&m->rq); + } + } return; error: @@ -417,7 +423,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len) return -EREMOTEIO; if (!(ts->wr->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n"); + p9_debug(P9_DEBUG_ERROR, "blocking write ...\n"); oldfs = get_fs(); set_fs(get_ds()); @@ -450,16 +456,17 @@ static void p9_write_work(struct work_struct *work) } if (!m->wsize) { + spin_lock(&m->client->lock); if (list_empty(&m->unsent_req_list)) { clear_bit(Wworksched, &m->wsched); + spin_unlock(&m->client->lock); return; } - spin_lock(&m->client->lock); req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); req->status = REQ_STATUS_SENT; - P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req); + p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); m->wbuf = req->tc->sdata; @@ -468,15 +475,14 @@ static void p9_write_work(struct work_struct *work) spin_unlock(&m->client->lock); } - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, - m->wsize); + p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", + m, m->wpos, m->wsize); clear_bit(Wpending, &m->wsched); err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); - if (err == -EAGAIN) { - clear_bit(Wworksched, &m->wsched); - return; - } + p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); + if (err == -EAGAIN) + goto end_clear; + if (err < 0) goto error; @@ -489,19 +495,21 @@ static void p9_write_work(struct work_struct *work) if (m->wpos == m->wsize) m->wpos = m->wsize = 0; - if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { +end_clear: + clear_bit(Wworksched, &m->wsched); + + if (m->wsize || !list_empty(&m->unsent_req_list)) { if (test_and_clear_bit(Wpending, &m->wsched)) n = POLLOUT; else n = p9_fd_poll(m->client, NULL); - if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); - } else - clear_bit(Wworksched, &m->wsched); - } else - clear_bit(Wworksched, &m->wsched); + if ((n & POLLOUT) && + !test_and_set_bit(Wworksched, &m->wsched)) { + p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); + schedule_work(&m->wq); + } + } return; @@ -510,21 +518,20 @@ error: clear_bit(Wworksched, &m->wsched); } -static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int p9_pollwake(wait_queue_t *wait, unsigned int mode, int sync, void *key) { struct p9_poll_wait *pwait = container_of(wait, struct p9_poll_wait, wait); struct p9_conn *m = pwait->conn; unsigned long flags; - DECLARE_WAITQUEUE(dummy_wait, p9_poll_task); spin_lock_irqsave(&p9_poll_lock, flags); if (list_empty(&m->poll_pending_link)) list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); spin_unlock_irqrestore(&p9_poll_lock, flags); - /* perform the default wake up operation */ - return default_wake_function(&dummy_wait, mode, sync, key); + schedule_work(&p9_poll_work); + return 1; } /** @@ -551,7 +558,7 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) } if (!pwait) { - P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); + p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n"); return; } @@ -562,22 +569,19 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) } /** - * p9_conn_create - allocate and initialize the per-session mux data + * p9_conn_create - initialize the per-session mux data * @client: client instance * * Note: Creates the polling task if this is the first session. */ -static struct p9_conn *p9_conn_create(struct p9_client *client) +static void p9_conn_create(struct p9_client *client) { int n; - struct p9_conn *m; + struct p9_trans_fd *ts = client->trans; + struct p9_conn *m = &ts->conn; - P9_DPRINTK(P9_DEBUG_TRANS, "client %p msize %d\n", client, - client->msize); - m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); - if (!m) - return ERR_PTR(-ENOMEM); + p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize); INIT_LIST_HEAD(&m->mux_list); m->client = client; @@ -591,16 +595,14 @@ static struct p9_conn *p9_conn_create(struct p9_client *client) n = p9_fd_poll(client, &m->pt); if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); set_bit(Rpending, &m->wsched); } if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); set_bit(Wpending, &m->wsched); } - - return m; } /** @@ -618,7 +620,7 @@ static void p9_poll_mux(struct p9_conn *m) n = p9_fd_poll(m->client, NULL); if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { - P9_DPRINTK(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); + p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); if (n >= 0) n = -ECONNRESET; p9_conn_cancel(m, n); @@ -626,20 +628,20 @@ static void p9_poll_mux(struct p9_conn *m) if (n & POLLIN) { set_bit(Rpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m); if (!test_and_set_bit(Rworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); + p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); + schedule_work(&m->rq); } } if (n & POLLOUT) { set_bit(Wpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); if ((m->wsize || !list_empty(&m->unsent_req_list)) && !test_and_set_bit(Wworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); + p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); + schedule_work(&m->wq); } } } @@ -659,10 +661,10 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { int n; struct p9_trans_fd *ts = client->trans; - struct p9_conn *m = ts->conn; + struct p9_conn *m = &ts->conn; - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, - current, req->tc, req->tc->id); + p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", + m, current, req->tc, req->tc->id); if (m->err < 0) return m->err; @@ -677,7 +679,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) n = p9_fd_poll(m->client, NULL); if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) - queue_work(p9_mux_wq, &m->wq); + schedule_work(&m->wq); return 0; } @@ -686,7 +688,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { int ret = 1; - P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req); + p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&client->lock); @@ -694,14 +696,26 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; ret = 0; - } else if (req->status == REQ_STATUS_SENT) - req->status = REQ_STATUS_FLSH; - + } spin_unlock(&client->lock); return ret; } +static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); + + /* we haven't received a response for oldreq, + * remove it from the list. + */ + spin_lock(&client->lock); + list_del(&req->req_list); + spin_unlock(&client->lock); + + return 0; +} + /** * parse_opts - parse mount options into p9_fd_opts structure * @params: options string passed from mount @@ -716,7 +730,6 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) substring_t args[MAX_OPT_ARGS]; int option; char *options, *tmp_options; - int ret; opts->port = P9_PORT; opts->rfd = ~0; @@ -727,8 +740,8 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) tmp_options = kstrdup(params, GFP_KERNEL); if (!tmp_options) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option string\n"); + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; @@ -739,12 +752,11 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) if (!*p) continue; token = match_token(p, tokens, args); - if (token != Opt_err) { + if ((token != Opt_err) && (token != Opt_privport)) { r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - ret = r; + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); continue; } } @@ -758,6 +770,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) case Opt_wfdno: opts->wfd = option; break; + case Opt_privport: + opts->privport = 1; + break; default: continue; } @@ -769,7 +784,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) static int p9_fd_open(struct p9_client *client, int rfd, int wfd) { - struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), + struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!ts) return -ENOMEM; @@ -794,53 +809,43 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd) static int p9_socket_open(struct p9_client *client, struct socket *csocket) { struct p9_trans_fd *p; - int ret, fd; + struct file *file; - p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); + p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!p) return -ENOMEM; csocket->sk->sk_allocation = GFP_NOIO; - fd = sock_map_fd(csocket, 0); - if (fd < 0) { - P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); + file = sock_alloc_file(csocket, 0, NULL); + if (IS_ERR(file)) { + pr_err("%s (%d): failed to map fd\n", + __func__, task_pid_nr(current)); sock_release(csocket); kfree(p); - return fd; + return PTR_ERR(file); } - get_file(csocket->file); - get_file(csocket->file); - p->wr = p->rd = csocket->file; + get_file(file); + p->wr = p->rd = file; client->trans = p; client->status = Connected; - sys_close(fd); /* still racy */ - p->rd->f_flags |= O_NONBLOCK; - p->conn = p9_conn_create(client); - if (IS_ERR(p->conn)) { - ret = PTR_ERR(p->conn); - p->conn = NULL; - kfree(p); - sockfd_put(csocket); - sockfd_put(csocket); - return ret; - } + p9_conn_create(client); return 0; } /** - * p9_mux_destroy - cancels all pending requests and frees mux resources + * p9_mux_destroy - cancels all pending requests of mux * @m: mux to destroy * */ static void p9_conn_destroy(struct p9_conn *m) { - P9_DPRINTK(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, - m->mux_list.prev, m->mux_list.next); + p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", + m, m->mux_list.prev, m->mux_list.next); p9_mux_poll_stop(m); cancel_work_sync(&m->rq); @@ -849,7 +854,6 @@ static void p9_conn_destroy(struct p9_conn *m) p9_conn_cancel(m, -ECONNRESET); m->client = NULL; - kfree(m); } /** @@ -871,7 +875,7 @@ static void p9_fd_close(struct p9_client *client) client->status = Disconnected; - p9_conn_destroy(ts->conn); + p9_conn_destroy(&ts->conn); if (ts->rd) fput(ts->rd); @@ -898,6 +902,24 @@ static inline int valid_ipaddr4(const char *buf) return 0; } +static int p9_bind_privport(struct socket *sock) +{ + struct sockaddr_in cl; + int port, err = -EINVAL; + + memset(&cl, 0, sizeof(cl)); + cl.sin_family = AF_INET; + cl.sin_addr.s_addr = INADDR_ANY; + for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { + cl.sin_port = htons((ushort)port); + err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); + if (err != -EADDRINUSE) + break; + } + return err; +} + + static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { @@ -918,20 +940,30 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); - err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); - + err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_INET, + SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { - P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); + pr_err("%s (%d): problem creating socket\n", + __func__, task_pid_nr(current)); return err; } + if (opts.privport) { + err = p9_bind_privport(csocket); + if (err < 0) { + pr_err("%s (%d): problem binding to privport\n", + __func__, task_pid_nr(current)); + sock_release(csocket); + return err; + } + } + err = csocket->ops->connect(csocket, (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_tcp: problem connecting socket to %s\n", - addr); + pr_err("%s (%d): problem connecting socket to %s\n", + __func__, task_pid_nr(current), addr); sock_release(csocket); return err; } @@ -949,24 +981,26 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) csocket = NULL; if (strlen(addr) >= UNIX_PATH_MAX) { - P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", - addr); + pr_err("%s (%d): address too long: %s\n", + __func__, task_pid_nr(current), addr); return -ENAMETOOLONG; } sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); - err = sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_UNIX, + SOCK_STREAM, 0, &csocket, 1); if (err < 0) { - P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n"); + pr_err("%s (%d): problem creating socket\n", + __func__, task_pid_nr(current)); + return err; } err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, sizeof(struct sockaddr_un) - 1, 0); if (err < 0) { - P9_EPRINTK(KERN_ERR, - "p9_trans_unix: problem connecting socket: %s: %d\n", - addr, err); + pr_err("%s (%d): problem connecting socket: %s: %d\n", + __func__, task_pid_nr(current), addr, err); sock_release(csocket); return err; } @@ -984,7 +1018,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) parse_opts(args, &opts); if (opts.rfd == ~0 || opts.wfd == ~0) { - printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); + pr_err("Insufficient options for proto=fd\n"); return -ENOPROTOOPT; } @@ -993,14 +1027,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) return err; p = (struct p9_trans_fd *) client->trans; - p->conn = p9_conn_create(client); - if (IS_ERR(p->conn)) { - err = PTR_ERR(p->conn); - p->conn = NULL; - fput(p->rd); - fput(p->wr); - return err; - } + p9_conn_create(client); return 0; } @@ -1008,11 +1035,12 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) static struct p9_trans_module p9_tcp_trans = { .name = "tcp", .maxsize = MAX_SOCK_BUF, - .def = 1, + .def = 0, .create = p9_fd_create_tcp, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, .owner = THIS_MODULE, }; @@ -1024,6 +1052,7 @@ static struct p9_trans_module p9_unix_trans = { .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, .owner = THIS_MODULE, }; @@ -1035,6 +1064,7 @@ static struct p9_trans_module p9_fd_trans = { .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, .owner = THIS_MODULE, }; @@ -1047,12 +1077,12 @@ static struct p9_trans_module p9_fd_trans = { * */ -static int p9_poll_proc(void *a) +static void p9_poll_workfn(struct work_struct *work) { unsigned long flags; - P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); - repeat: + p9_debug(P9_DEBUG_TRANS, "start %p\n", current); + spin_lock_irqsave(&p9_poll_lock, flags); while (!list_empty(&p9_poll_pending_list)) { struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, @@ -1067,35 +1097,11 @@ static int p9_poll_proc(void *a) } spin_unlock_irqrestore(&p9_poll_lock, flags); - set_current_state(TASK_INTERRUPTIBLE); - if (list_empty(&p9_poll_pending_list)) { - P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n"); - schedule(); - } - __set_current_state(TASK_RUNNING); - - if (!kthread_should_stop()) - goto repeat; - - P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); - return 0; + p9_debug(P9_DEBUG_TRANS, "finish\n"); } int p9_trans_fd_init(void) { - p9_mux_wq = create_workqueue("v9fs"); - if (!p9_mux_wq) { - printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); - return -ENOMEM; - } - - p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll"); - if (IS_ERR(p9_poll_task)) { - destroy_workqueue(p9_mux_wq); - printk(KERN_WARNING "v9fs: mux: creating poll task failed\n"); - return PTR_ERR(p9_poll_task); - } - v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); @@ -1105,10 +1111,8 @@ int p9_trans_fd_init(void) void p9_trans_fd_exit(void) { - kthread_stop(p9_poll_task); + flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); - - destroy_workqueue(p9_mux_wq); } diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 17c5ba7551a..14ad43b5cf8 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -26,6 +26,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> @@ -55,11 +57,8 @@ #define P9_RDMA_IRD 0 #define P9_RDMA_ORD 0 #define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ -#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can - * safely advertise a maxsize - * of 64k */ +#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */ -#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT) /** * struct p9_trans_rdma - RDMA transport instance * @@ -74,7 +73,9 @@ * @sq_depth: The depth of the Send Queue * @sq_sem: Semaphore for the SQ * @rq_depth: The depth of the Receive Queue. - * @rq_count: Count of requests in the Receive Queue. + * @rq_sem: Semaphore for the RQ + * @excess_rc : Amount of posted Receive Contexts without a pending request. + * See rdma_request() * @addr: The remote peer's address * @req_lock: Protects the active request list * @cm_done: Completion event for connection management tracking @@ -99,7 +100,8 @@ struct p9_trans_rdma { int sq_depth; struct semaphore sq_sem; int rq_depth; - atomic_t rq_count; + struct semaphore rq_sem; + atomic_t excess_rc; struct sockaddr_in addr; spinlock_t req_lock; @@ -168,7 +170,6 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) substring_t args[MAX_OPT_ARGS]; int option; char *options, *tmp_options; - int ret; opts->port = P9_PORT; opts->sq_depth = P9_RDMA_SQ_DEPTH; @@ -180,8 +181,8 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) tmp_options = kstrdup(params, GFP_KERNEL); if (!tmp_options) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option string\n"); + p9_debug(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); return -ENOMEM; } options = tmp_options; @@ -192,11 +193,12 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) if (!*p) continue; token = match_token(p, tokens, args); + if (token == Opt_err) + continue; r = match_int(&args[0], &option); if (r < 0) { - P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); - ret = r; + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); continue; } switch (token) { @@ -297,15 +299,20 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, if (!req) goto err_out; + /* Check that we have not yet received a reply for this request. + */ + if (unlikely(req->rc)) { + pr_err("Duplicate reply for request %d", tag); + goto err_out; + } + req->rc = c->rc; - req->status = REQ_STATUS_RCVD; - p9_client_cb(client, req); + p9_client_cb(client, req, REQ_STATUS_RCVD); return; err_out: - P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n", - req, err, status); + p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); rdma->state = P9_RDMA_FLUSHING; client->status = Disconnected; } @@ -321,8 +328,8 @@ handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, static void qp_event_handler(struct ib_event *event, void *context) { - P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event, - context); + p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n", + event->event, context); } static void cq_comp_handler(struct ib_cq *cq, void *cq_context) @@ -338,8 +345,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) switch (c->wc_op) { case IB_WC_RECV: - atomic_dec(&rdma->rq_count); handle_recv(client, rdma, c, wc.status, wc.byte_len); + up(&rdma->rq_sem); break; case IB_WC_SEND: @@ -348,8 +355,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) break; default: - printk(KERN_ERR "9prdma: unexpected completion type, " - "c->wc_op=%d, wc.opcode=%d, status=%d\n", + pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n", c->wc_op, wc.opcode, wc.status); break; } @@ -359,7 +365,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) static void cq_event_handler(struct ib_event *e, void *v) { - P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); + p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); } static void rdma_destroy_trans(struct p9_trans_rdma *rdma) @@ -410,7 +416,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c) return ib_post_recv(rdma->qp, &wr, &bad_wr); error: - P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + p9_debug(P9_DEBUG_ERROR, "EIO\n"); return -EIO; } @@ -424,32 +430,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) struct p9_rdma_context *c = NULL; struct p9_rdma_context *rpl_context = NULL; + /* When an error occurs between posting the recv and the send, + * there will be a receive context posted without a pending request. + * Since there is no way to "un-post" it, we remember it and skip + * post_recv() for the next request. + * So here, + * see if we are this `next request' and need to absorb an excess rc. + * If yes, then drop and free our own, and do not recv_post(). + **/ + if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { + if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { + /* Got one ! */ + kfree(req->rc); + req->rc = NULL; + goto dont_need_post_recv; + } else { + /* We raced and lost. */ + atomic_inc(&rdma->excess_rc); + } + } + /* Allocate an fcall for the reply */ - rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); + rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); if (!rpl_context) { err = -ENOMEM; - goto err_close; - } - - /* - * If the request has a buffer, steal it, otherwise - * allocate a new one. Typically, requests should already - * have receive buffers allocated and just swap them around - */ - if (!req->rc) { - req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, - GFP_KERNEL); - if (req->rc) { - req->rc->sdata = (char *) req->rc + - sizeof(struct p9_fcall); - req->rc->capacity = client->msize; - } + goto recv_error; } rpl_context->rc = req->rc; - if (!rpl_context->rc) { - err = -ENOMEM; - goto err_free2; - } /* * Post a receive buffer for this request. We need to ensure @@ -458,29 +465,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) * outstanding request, so we must keep a count to avoid * overflowing the RQ. */ - if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { - err = post_recv(client, rpl_context); - if (err) - goto err_free1; - } else - atomic_dec(&rdma->rq_count); + if (down_interruptible(&rdma->rq_sem)) { + err = -EINTR; + goto recv_error; + } + err = post_recv(client, rpl_context); + if (err) { + p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); + goto recv_error; + } /* remove posted receive buffer from request structure */ req->rc = NULL; +dont_need_post_recv: /* Post the request */ - c = kmalloc(sizeof *c, GFP_KERNEL); + c = kmalloc(sizeof *c, GFP_NOFS); if (!c) { err = -ENOMEM; - goto err_free1; + goto send_error; } c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, c->req->tc->sdata, c->req->tc->size, DMA_TO_DEVICE); - if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) - goto error; + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { + err = -EIO; + goto send_error; + } sge.addr = c->busa; sge.length = c->req->tc->size; @@ -494,22 +507,38 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) wr.sg_list = &sge; wr.num_sge = 1; - if (down_interruptible(&rdma->sq_sem)) - goto error; + if (down_interruptible(&rdma->sq_sem)) { + err = -EINTR; + goto send_error; + } - return ib_post_send(rdma->qp, &wr, &bad_wr); + /* Mark request as `sent' *before* we actually send it, + * because doing if after could erase the REQ_STATUS_RCVD + * status in case of a very fast reply. + */ + req->status = REQ_STATUS_SENT; + err = ib_post_send(rdma->qp, &wr, &bad_wr); + if (err) + goto send_error; - error: + /* Success */ + return 0; + + /* Handle errors that happened during or while preparing the send: */ + send_error: + req->status = REQ_STATUS_ERROR; kfree(c); - kfree(rpl_context->rc); - kfree(rpl_context); - P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); - return -EIO; - err_free1: - kfree(rpl_context->rc); - err_free2: + p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); + + /* Ach. + * We did recv_post(), but not send. We have one recv_post in excess. + */ + atomic_inc(&rdma->excess_rc); + return err; + + /* Handle errors that happened during or while preparing post_recv(): */ + recv_error: kfree(rpl_context); - err_close: spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; @@ -554,17 +583,30 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) spin_lock_init(&rdma->req_lock); init_completion(&rdma->cm_done); sema_init(&rdma->sq_sem, rdma->sq_depth); - atomic_set(&rdma->rq_count, 0); + sema_init(&rdma->rq_sem, rdma->rq_depth); + atomic_set(&rdma->excess_rc, 0); return rdma; } -/* its not clear to me we can do anything after send has been posted */ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) { + /* Nothing to do here. + * We will take care of it (if we have to) in rdma_cancelled() + */ return 1; } +/* A request has been fully flushed without a reply. + * That means we have posted one buffer in excess. + */ +static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + atomic_inc(&rdma->excess_rc); + return 0; +} + /** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance @@ -592,7 +634,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) return -ENOMEM; /* Create the RDMA CM ID */ - rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); + rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; @@ -697,6 +740,7 @@ static struct p9_trans_module p9_rdma_trans = { .close = rdma_close, .request = rdma_request, .cancel = rdma_cancel, + .cancelled = rdma_cancelled, }; /** diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index c8f3f72ab20..6940d8fe897 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -26,6 +26,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/in.h> #include <linux/module.h> #include <linux/net.h> @@ -37,19 +39,24 @@ #include <linux/inet.h> #include <linux/idr.h> #include <linux/file.h> +#include <linux/highmem.h> #include <linux/slab.h> #include <net/9p/9p.h> #include <linux/parser.h> #include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/scatterlist.h> +#include <linux/swap.h> #include <linux/virtio.h> #include <linux/virtio_9p.h> +#include "trans_common.h" #define VIRTQUEUE_NUM 128 /* a single mutex to manage channel initialization and attachment */ static DEFINE_MUTEX(virtio_9p_lock); +static DECLARE_WAIT_QUEUE_HEAD(vp_wq); +static atomic_t vp_pinned = ATOMIC_INIT(0); /** * struct virtio_chan - per-instance transport information @@ -77,7 +84,10 @@ struct virtio_chan { struct virtqueue *vq; int ring_bufs_avail; wait_queue_head_t *vc_wq; - + /* This is global limit. Since we don't have a global structure, + * will be placing it in each channel. + */ + unsigned long p9_max_pages; /* Scatterlist: can be too big for stack. */ struct scatterlist sg[VIRTQUEUE_NUM]; @@ -138,28 +148,24 @@ static void req_done(struct virtqueue *vq) struct p9_req_t *req; unsigned long flags; - P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); + p9_debug(P9_DEBUG_TRANS, ": request done\n"); - do { + while (1) { spin_lock_irqsave(&chan->lock, flags); rc = virtqueue_get_buf(chan->vq, &len); - - if (rc != NULL) { - if (!chan->ring_bufs_avail) { - chan->ring_bufs_avail = 1; - wake_up(chan->vc_wq); - } - spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); - P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", - rc->tag); - req = p9_tag_lookup(chan->client, rc->tag); - req->status = REQ_STATUS_RCVD; - p9_client_cb(chan->client, req); - } else { + if (rc == NULL) { spin_unlock_irqrestore(&chan->lock, flags); + break; } - } while (rc != NULL); + chan->ring_bufs_avail = 1; + spin_unlock_irqrestore(&chan->lock, flags); + /* Wakeup if anyone waiting for VirtIO ring space. */ + wake_up(chan->vc_wq); + p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc); + p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); + req = p9_tag_lookup(chan->client, rc->tag); + p9_client_cb(chan->client, req, REQ_STATUS_RCVD); + } } /** @@ -176,9 +182,8 @@ static void req_done(struct virtqueue *vq) * */ -static int -pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, - int count) +static int pack_sg_list(struct scatterlist *sg, int start, + int limit, char *data, int count) { int s; int index = start; @@ -187,12 +192,15 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, s = rest_of_page(data); if (s > count) s = count; + BUG_ON(index > limit); + /* Make sure we don't terminate early. */ + sg_unmark_end(&sg[index]); sg_set_buf(&sg[index++], data, s); count -= s; data += s; - BUG_ON(index > limit); } - + if (index-start) + sg_mark_end(&sg[index - 1]); return index-start; } @@ -203,6 +211,48 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) } /** + * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, + * this takes a list of pages. + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @pdata: a list of pages to add into sg. + * @nr_pages: number of pages to pack into the scatter/gather list + * @data: data to pack into scatter/gather list + * @count: amount of data to pack into the scatter/gather list + */ +static int +pack_sg_list_p(struct scatterlist *sg, int start, int limit, + struct page **pdata, int nr_pages, char *data, int count) +{ + int i = 0, s; + int data_off; + int index = start; + + BUG_ON(nr_pages > (limit - start)); + /* + * if the first page doesn't start at + * page boundary find the offset + */ + data_off = offset_in_page(data); + while (nr_pages) { + s = rest_of_page(data); + if (s > count) + s = count; + /* Make sure we don't terminate early. */ + sg_unmark_end(&sg[index]); + sg_set_page(&sg[index++], pdata[i++], s, data_off); + data_off = 0; + data += s; + count -= s; + nr_pages--; + } + + if (index-start) + sg_mark_end(&sg[index - 1]); + return index - start; +} + +/** * p9_virtio_request - issue a request * @client: client instance issuing the request * @req: request to be issued @@ -212,24 +262,32 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) static int p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { - int in, out; - struct virtio_chan *chan = client->trans; - char *rdata = (char *)req->rc+sizeof(struct p9_fcall); - unsigned long flags; int err; + int in, out, out_sgs, in_sgs; + unsigned long flags; + struct virtio_chan *chan = client->trans; + struct scatterlist *sgs[2]; - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); + p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); -req_retry: req->status = REQ_STATUS_SENT; - +req_retry: spin_lock_irqsave(&chan->lock, flags); - out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, - req->tc->size); - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, - client->msize); - err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); + out_sgs = in_sgs = 0; + /* Handle out VirtIO ring buffers */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); + if (out) + sgs[out_sgs++] = chan->sg; + + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); + if (in) + sgs[out_sgs + in_sgs++] = chan->sg + out; + + err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, + GFP_ATOMIC); if (err < 0) { if (err == -ENOSPC) { chan->ring_bufs_avail = 0; @@ -239,24 +297,204 @@ req_retry: if (err == -ERESTARTSYS) return err; - P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); + p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); goto req_retry; } else { spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, - "9p debug: " - "virtio rpc add_buf returned failure"); + p9_debug(P9_DEBUG_TRANS, + "virtio rpc add_sgs returned failure\n"); return -EIO; } } - virtqueue_kick(chan->vq); spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); + p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); return 0; } +static int p9_get_mapped_pages(struct virtio_chan *chan, + struct page **pages, char *data, + int nr_pages, int write, int kern_buf) +{ + int err; + if (!kern_buf) { + /* + * We allow only p9_max_pages pinned. We wait for the + * Other zc request to finish here + */ + if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { + err = wait_event_interruptible(vp_wq, + (atomic_read(&vp_pinned) < chan->p9_max_pages)); + if (err == -ERESTARTSYS) + return err; + } + err = p9_payload_gup(data, &nr_pages, pages, write); + if (err < 0) + return err; + atomic_add(nr_pages, &vp_pinned); + } else { + /* kernel buffer, no need to pin pages */ + int s, index = 0; + int count = nr_pages; + while (nr_pages) { + s = rest_of_page(data); + if (is_vmalloc_addr(data)) + pages[index++] = vmalloc_to_page(data); + else + pages[index++] = kmap_to_page(data); + data += s; + nr_pages--; + } + nr_pages = count; + } + return nr_pages; +} + +/** + * p9_virtio_zc_request - issue a zero copy request + * @client: client instance issuing the request + * @req: request to be issued + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * + */ +static int +p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, + char *uidata, char *uodata, int inlen, + int outlen, int in_hdr_len, int kern_buf) +{ + int in, out, err, out_sgs, in_sgs; + unsigned long flags; + int in_nr_pages = 0, out_nr_pages = 0; + struct page **in_pages = NULL, **out_pages = NULL; + struct virtio_chan *chan = client->trans; + struct scatterlist *sgs[4]; + + p9_debug(P9_DEBUG_TRANS, "virtio request\n"); + + if (uodata) { + out_nr_pages = p9_nr_pages(uodata, outlen); + out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, + GFP_NOFS); + if (!out_pages) { + err = -ENOMEM; + goto err_out; + } + out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, + out_nr_pages, 0, kern_buf); + if (out_nr_pages < 0) { + err = out_nr_pages; + kfree(out_pages); + out_pages = NULL; + goto err_out; + } + } + if (uidata) { + in_nr_pages = p9_nr_pages(uidata, inlen); + in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, + GFP_NOFS); + if (!in_pages) { + err = -ENOMEM; + goto err_out; + } + in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, + in_nr_pages, 1, kern_buf); + if (in_nr_pages < 0) { + err = in_nr_pages; + kfree(in_pages); + in_pages = NULL; + goto err_out; + } + } + req->status = REQ_STATUS_SENT; +req_retry_pinned: + spin_lock_irqsave(&chan->lock, flags); + + out_sgs = in_sgs = 0; + + /* out data */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); + + if (out) + sgs[out_sgs++] = chan->sg; + + if (out_pages) { + sgs[out_sgs++] = chan->sg + out; + out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + out_pages, out_nr_pages, uodata, outlen); + } + + /* + * Take care of in data + * For example TREAD have 11. + * 11 is the read/write header = PDU Header(7) + IO Size (4). + * Arrange in such a way that server places header in the + * alloced memory and payload onto the user buffer. + */ + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); + if (in) + sgs[out_sgs + in_sgs++] = chan->sg + out; + + if (in_pages) { + sgs[out_sgs + in_sgs++] = chan->sg + out + in; + in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, + in_pages, in_nr_pages, uidata, inlen); + } + + BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs)); + err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, + GFP_ATOMIC); + if (err < 0) { + if (err == -ENOSPC) { + chan->ring_bufs_avail = 0; + spin_unlock_irqrestore(&chan->lock, flags); + err = wait_event_interruptible(*chan->vc_wq, + chan->ring_bufs_avail); + if (err == -ERESTARTSYS) + goto err_out; + + p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); + goto req_retry_pinned; + } else { + spin_unlock_irqrestore(&chan->lock, flags); + p9_debug(P9_DEBUG_TRANS, + "virtio rpc add_sgs returned failure\n"); + err = -EIO; + goto err_out; + } + } + virtqueue_kick(chan->vq); + spin_unlock_irqrestore(&chan->lock, flags); + p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); + err = wait_event_interruptible(*req->wq, + req->status >= REQ_STATUS_RCVD); + /* + * Non kernel buffers are pinned, unpin them + */ +err_out: + if (!kern_buf) { + if (in_pages) { + p9_release_pages(in_pages, in_nr_pages); + atomic_sub(in_nr_pages, &vp_pinned); + } + if (out_pages) { + p9_release_pages(out_pages, out_nr_pages); + atomic_sub(out_nr_pages, &vp_pinned); + } + /* wakeup anybody waiting for slots to pin pages */ + wake_up(&vp_wq); + } + kfree(in_pages); + kfree(out_pages); + return err; +} + static ssize_t p9_mount_tag_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -288,7 +526,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL); if (!chan) { - printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n"); + pr_err("Failed to allocate virtio 9P channel\n"); err = -ENOMEM; goto fail; } @@ -308,9 +546,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->inuse = false; if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { - vdev->config->get(vdev, - offsetof(struct virtio_9p_config, tag_len), - &tag_len, sizeof(tag_len)); + virtio_cread(vdev, struct virtio_9p_config, tag_len, &tag_len); } else { err = -EINVAL; goto out_free_vq; @@ -320,8 +556,9 @@ static int p9_virtio_probe(struct virtio_device *vdev) err = -ENOMEM; goto out_free_vq; } - vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag), - tag, tag_len); + + virtio_cread_bytes(vdev, offsetof(struct virtio_9p_config, tag), + tag, tag_len); chan->tag = tag; chan->tag_len = tag_len; err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); @@ -335,10 +572,16 @@ static int p9_virtio_probe(struct virtio_device *vdev) } init_waitqueue_head(chan->vc_wq); chan->ring_bufs_avail = 1; + /* Ceiling limit to avoid denial of service attacks */ + chan->p9_max_pages = nr_free_buffer_pages()/4; mutex_lock(&virtio_9p_lock); list_add_tail(&chan->chan_list, &virtio_chan_list); mutex_unlock(&virtio_9p_lock); + + /* Let udev rules use the new mount_tag attribute. */ + kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); + return 0; out_free_tag: @@ -387,7 +630,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) mutex_unlock(&virtio_9p_lock); if (!found) { - printk(KERN_ERR "9p: no channels available\n"); + pr_err("no channels available\n"); return ret; } @@ -408,13 +651,15 @@ static void p9_virtio_remove(struct virtio_device *vdev) { struct virtio_chan *chan = vdev->priv; - BUG_ON(chan->inuse); + if (chan->inuse) + p9_virtio_close(chan->client); vdev->config->del_vqs(vdev); mutex_lock(&virtio_9p_lock); list_del(&chan->chan_list); mutex_unlock(&virtio_9p_lock); sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); + kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); kfree(chan->tag); kfree(chan->vc_wq); kfree(chan); @@ -446,9 +691,16 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .close = p9_virtio_close, .request = p9_virtio_request, + .zc_request = p9_virtio_zc_request, .cancel = p9_virtio_cancel, - .maxsize = PAGE_SIZE*16, - .def = 0, + /* + * We leave one entry for input and one entry for response + * headers. We also skip one more entry to accomodate, address + * that are not at page boundary, that can result in an extra + * page in zero copy. + */ + .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), + .def = 1, .owner = THIS_MODULE, }; diff --git a/net/9p/util.c b/net/9p/util.c index e048701a72d..59f278e64f5 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -67,7 +67,7 @@ EXPORT_SYMBOL(p9_idpool_create); /** * p9_idpool_destroy - create a new per-connection id pool - * @p: idpool to destory + * @p: idpool to destroy */ void p9_idpool_destroy(struct p9_idpool *p) @@ -87,26 +87,21 @@ EXPORT_SYMBOL(p9_idpool_destroy); int p9_idpool_get(struct p9_idpool *p) { - int i = 0; - int error; + int i; unsigned long flags; -retry: - if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) - return 0; - + idr_preload(GFP_NOFS); spin_lock_irqsave(&p->lock, flags); /* no need to store exactly p, we just need something non-null */ - error = idr_get_new(&p->pool, p, &i); - spin_unlock_irqrestore(&p->lock, flags); + i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT); - if (error == -EAGAIN) - goto retry; - else if (error) + spin_unlock_irqrestore(&p->lock, flags); + idr_preload_end(); + if (i < 0) return -1; - P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p); + p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p); return i; } EXPORT_SYMBOL(p9_idpool_get); @@ -124,7 +119,7 @@ void p9_idpool_put(int id, struct p9_idpool *p) { unsigned long flags; - P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p); + p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", id, p); spin_lock_irqsave(&p->lock, flags); idr_remove(&p->pool, id); |
